基于讯飞星火的语音问答

一   .  简介

     项目基于讯飞星火api作为核心能力并在其中搭载了Web Speech API中的webkitSpeechRecognition对象来实现语音转文字的功能和TTS (Text-to-Speech):通过调用百度TTS API将文本转换成语音,使机器人能够“说话”。实现了语音输入,语音输出的对话形式。

实现方法:后端基于python的flask框架,前端使用了html+css并用AJAX通过XMLHttpRequest对象实现异步请求。

二  . 代码

   2.1 前端:

     






    
  
快乐加油站


欢迎来到快乐加油站!

我是一个情绪大模型。请随意提问或闲聊。

我生来就是高山而非溪流,我欲与群峰之巅俯视平庸的沟壑,我生来就是人杰而非草芥,我站在伟人之肩藐视卑微的懦夫,请只管跑下去,天自己会亮

你想播放音乐吗?

快乐机器人
注意:

将appid   apiKey   apiSecret  替换为自己的密钥,申请的方式在上一篇博客。

2.2  后端:

获取大模型的回答并修改模型自定义风格  happyApi.py:

# coding: utf-8
import _thread as thread
import base64
import datetime
import hashlib
import hmac
import json
import ssl
from urllib.parse import urlparse, urlencode
from wsgiref.handlers import format_date_time
import websocket
import os
import pygame

import SparkApi


#大模型接口
def get_response_from_model(appid, api_key, api_secret, spark_url, domain, question):
    # 调用SparkApi模块的方法来获取回答
    SparkApi.main(appid, api_key, api_secret, spark_url, domain, question)
    answer = SparkApi.getText("assistant", SparkApi.answer)[0]["content"]
    return answer

class Ws_Param(object):
    # 初始化
    def __init__(self, APPID, APIKey, APISecret, gpt_url):
        self.APPID = APPID
        self.APIKey = APIKey
        self.APISecret = APISecret
        self.host = urlparse(gpt_url).netloc
        self.path = urlparse(gpt_url).path
        self.gpt_url = gpt_url

    # 生成url
    def create_url(self):
        now = datetime.datetime.now()
        date = format_date_time(datetime.datetime.timestamp(now))

        signature_origin = f"host: {self.host}\ndate: {date}\nGET {self.path} HTTP/1.1"
        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
                                 digestmod=hashlib.sha256).digest()
        signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')

        authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')

        v = {
            "authorization": authorization,
            "date": date,
            "host": self.host
        }
        url = self.gpt_url + '?' + urlencode(v)
        return url


# 收到websocket错误的处理
def on_error(ws, error):
    print("### error:", error)


# 收到websocket关闭的处理
def on_close(ws, close_status_code, close_msg):
    print("### closed ###")


# 收到websocket连接建立的处理
def on_open(ws):
    thread.start_new_thread(run, (ws,))


# 收到websocket消息的处理
def on_message(ws, message):
    message = json.loads(message)
    code = message['header']['code']
    if code != 0:
        print("### 请求出错: ", message)
    else:
        payload = message.get("payload")
        status = message['header']['status']
        if status == 2:
            print("### 合成完毕")
            # 播放音频
            pygame.mixer.init()  # 初始化pygame的音频系统
            pygame.mixer.music.load(ws.save_file_name)
            pygame.mixer.music.play()
            while pygame.mixer.music.get_busy():  # 等待音乐播放结束
                pygame.time.Clock().tick(10)
            ws.close()
        if payload and payload != "null":
            audio = payload.get("audio")
            if audio:
                audio = audio["audio"]
                with open(ws.save_file_name, 'ab') as f:
                    f.write(base64.b64decode(audio))


def run(ws, *args):
    body = {
        "header": {
            "app_id": ws.appid,
            "status": 0
        },
        "parameter": {
            "oral": {
                "spark_assist": 1,
                "oral_level": "mid"
            },
            "tts": {
                "vcn": ws.vcn,
                "speed": 66,
                "volume": 50,
                "pitch": 50,
                "bgs": 0,
                "reg": 0,
                "rdn": 0,
                "rhy": 0,
                "scn": 5,
                "version": 0,
                "L5SilLen": 0,
                "ParagraphSilLen": 0,
                "audio": {
                    "encoding": "lame",
                    "sample_rate": 16000,
                    "channels": 1,
                    "bit_depth": 16,
                    "frame_size": 0
                },
                "pybuf": {
                    "encoding": "utf8",
                    "compress": "raw",
                    "format": "plain"
                }
            }
        },
        "payload": {
            "text": {
                "encoding": "utf8",
                "compress": "raw",
                "format": "json",
                "status": 0,
                "seq": 0,
                "text": str(base64.b64encode(ws.text.encode('utf-8')), "UTF8")
            }
        }
    }

    ws.send(json.dumps(body))


def main(appid, api_secret, api_key, url, vcn, save_file_name):
    wsParam = Ws_Param(appid, api_key, api_secret, url)
    wsUrl = wsParam.create_url()
    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open)
    websocket.enableTrace(False)
    ws.appid = appid
    ws.vcn = vcn
    ws.save_file_name = save_file_name
    if os.path.exists(ws.save_file_name):
        os.remove(ws.save_file_name)

    while True:
        user_input = input("请输入要询问的问题 (输入 'exit' 退出程序): ")
        if user_input.lower() == 'exit':
            break
        # 从配置中获取必要的参数
        config = SparkApi.config()
        model_response = get_response_from_model(
            config["appid"],
            config["api_key"],
            config["api_secret"],
            config["Spark_url"],
            config["domain"],
            user_input
        )
        ws.text = model_response
        ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
        pygame.mixer.quit()  # 清理pygame资源

if __name__ == "__main__":
    main(
        appid="xxx",
        api_secret="xxx",
        api_key="xxx",
        url="wss://cbm01.cn-huabei-1.xf-yun.com/v1/private/medd90fec",
        # 发音人参数
        vcn="x4_lingxiaoxuan_oral",
        save_file_name="2.mp3"
    )


完整代码可私信刑获取或下载下一篇博客安装包

你可能感兴趣的:(语音识别,人工智能,语言模型,文心一言,opencv)