调用讯飞API朗读新浪国际新闻版块最新消息

  1. 调用讯飞API将文字转为语音文件
  2. 爬取新浪国际新闻版块最新消息保存到xlsx
  3. 讯飞API 文字有限制
  4. python程序打包成exe
  5. 语音即时播放

定制新闻朗读助手(调用科大讯飞API)

调用讯飞API将文字转为语音文件

#-*- coding: utf-8 -*-
import pyaudio
import wave
import os

    
def play_xunfei(wavfilename):

    CHUNK = 1024
    # 从目录中读取语音
#    wf = wave.open('mic4.wav', 'rb')
    wf = wave.open(wavfilename, 'rb')
    # read data
    data = wf.readframes(CHUNK)
    # 创建播放器
    p = pyaudio.PyAudio()
    # 获得语音文件的各个参数
    FORMAT = p.get_format_from_width(wf.getsampwidth())
    CHANNELS = wf.getnchannels()
    RATE = wf.getframerate()
#    print('FORMAT: {} \nCHANNELS: {} \nRATE: {}'.format(FORMAT, CHANNELS, RATE))
    # 打开音频流, output=True表示音频输出
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    frames_per_buffer=CHUNK,
                    output=True)
    # play stream (3) 按照1024的块读取音频数据到音频流,并播放
    while len(data) > 0:
        stream.write(data)
        data = wf.readframes(CHUNK)
        
        
        
import requests
import time
import hashlib
import base64
#  合成webapi接口地址
URL = "http://api.xfyun.cn/v1/service/v1/tts"
#  音频编码(raw合成的音频格式pcm、wav,lame合成的音频格式MP3)
AUE = "raw"
#  应用APPID(必须为webapi类型应用,并开通语音合成服务,参考帖子如何创建一个webapi应用:http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=36481
APPID = "5ce265c9"
#  接口密钥(webapi类型应用开通合成服务后,控制台--我的应用---语音合成---相应服务的apikey)
API_KEY = "dedd145d73ca0e1fd5e70a77f38365a1"

# 组装http请求头
def getHeader():
    curTime = str(int(time.time()))#1558401768
    # ttp=ssml
    param = "{\"aue\":\"" + AUE + "\",\"auf\":\"audio/L16;rate=16000\",\"voice_name\":\"xiaoyan\",\"engine_type\":\"intp65\"}"
#    print("param:{}".format(param))

    paramBase64 = str(base64.b64encode(param.encode('utf-8')), 'utf-8')
#    print("x_param:{}".format(paramBase64))

    m2 = hashlib.md5()
    m2.update((API_KEY + curTime + paramBase64).encode('utf-8'))

    checkSum = m2.hexdigest()
#    print('checkSum:{}'.format(checkSum))

    header = {
        'X-CurTime': curTime,
        'X-Param': paramBase64,
        'X-Appid': APPID,
        'X-CheckSum': checkSum,
        'X-Real-Ip': '127.0.0.1',
        'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
    }
#    print(header)
    return header


def getBody(text):
    data = {'text': text}
    return data


def writeFile(file, content):
    with open(file, 'wb') as f:
        f.write(content)
    f.close()

#  待合成文本内容
#with open('t2v_test.txt','r',encoding='utf-8') as f:
#    line = f.read()
#r = requests.post(URL, headers=getHeader(), data=getBody("内容"))
#待合成文本,使用utf-8编码,需urlencode,长度小于1000字节 一个汉字两个字节
def readtxt_by_xunfei(readcontent):
    print('讯飞正在支持朗读助手')
    r = requests.post(URL, headers=getHeader(), data=getBody(readcontent))
    
    contentType = r.headers['Content-Type']
    if contentType == "audio/mpeg":
        sid = r.headers['sid']
        if AUE == "raw":
    #        print(r.content)
    #   合成音频格式为pcm、wav并保存在audio目录下
#            writeFile("audio/" + sid + ".wav", r.content)
            writeFile(sid + ".wav", r.content)
        else:
    #        print(r.content)
    #   合成音频格式为mp3并保存在audio目录下
            writeFile("audio/" + "xiaoyan" + ".mp3", r.content)
        wavfilename=sid + ".wav"
        
        print("success, sid = " + sid)
        play_xunfei(wavfilename)
        os.remove(wavfilename)
    else:
    #   错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看)
        print(r.text)
def readtxt_by_SAPI(content):
    print('SAPI正在支持朗读助手')
    import win32com.client
    speaker = win32com.client.Dispatch("SAPI.SpVoice")
    speaker.Speak(content)  
    
def readtxt_by_pyttsx3(content):
    print('pyttsx3正在支持朗读助手')
    import pyttsx3
    engine = pyttsx3.init()
    volume=engine.getProperty('volume')
    engine.setProperty('volume', volume + 0.25)
    engine.say(content)
    engine.runAndWait()
    engine.stop()   
def readnews(titlelist):
    for i in titlelist:
        readtxt_by_xunfei(i)
#        readtxt_by_SAPI(i) #可以选择其他朗读引擎
#        readtxt_by_pyttsx3(i)
import paqu_sina    
readnews(paqu_sina.newtitlelist()))

爬取新浪国际新闻版块最新消息

将该脚本命名为paqu_sina.py

import requests
from bs4 import BeautifulSoup
import time
import json
import re
import pandas
import sys
if sys.getdefaultencoding() != 'utf-8':
    reload(sys)
    sys.setdefaultencoding('utf-8')
def getnewcontent(url):
    result = {}
    info = requests.get(url)
    info.encoding = 'utf-8'
    html = BeautifulSoup(info.text, 'html.parser')
    result['title'] = html.select('.second-title')[0].text
    result['date'] = html.select('.date')[0].text
    result['source'] = html.select('.source')[0].text
    article = []
    for v in html.select('.article p')[:-1]:
        article.append(v.text.strip())
    author_info = '\n'.join(article)
    result['content'] = author_info
    result['author'] = html.select('.show_author')[0].text.lstrip('责任编辑:')
    newsid = url.split('/')[-1].rstrip('.shtml').lstrip('doc-i')
    commenturl = 'http://comment5.news.sina.com.cn/page/info?version=1&format=json&channel=gj&newsid=comos-{}&group=undefined&compress=0&ie=utf-8&oe=utf-8&page=1&page_size=3&t_size=3&h_size=3&thread=1&callback=jsonp_1536041889769&_=1536041889769'
    comments = requests.get(commenturl.format(newsid))
    regex = re.compile(r'(.*?)\(')#去除左边特殊符号
    tmp = comments.text.lstrip(regex.search(comments.text).group())
    jd = json.loads(tmp.rstrip(')'))
    result['comment'] = jd['result']['count']['total'] #获取评论数
    return result
def getnewslink(url):
    test = requests.get(url)
    test2 =  test.text.lstrip('newsloadercallback(')
    jd = json.loads(test2.rstrip(')\n'))
    content = []
    for v in jd['result']['data']:
        content.append(getnewcontent(v['url']))
    return content
def getdata():
    url = 'https://interface.sina.cn/news/get_news_by_channel_new_v2018.d.html?cat_1=51923&show_num=27&level=1,2&page={}&callback=newsloadercallback&_=1536044408917'
    weibo_info = []
    for i in range(1,3):
        newsurl = url.format(i)#字符串格式化用i替换{}
        weibo_info.extend(getnewslink(newsurl))
    return weibo_info

    
def newtitlelist():
	print('正在载入新闻内容,请稍后。。。')
    new_info = getdata()
    #df = pandas.DataFrame(new_info)
    #df #去除全部 df.head() 取出5行 head(n)  n行
    #将文件下载为excel表格 
    #df.title
    #df.to_excel('weibonews.xlsx')
    titlelist=[]
    for i in new_info:
        titlelist.extend([i['title']])
    return titlelist
   

安装pyinstaller打包exe

1、安装pyinstaller(需要先安装pip)、再:pip install pyinstaller

2、定位到pyinstaller.exe所在文件夹(一般再python下的“scripts”文件夹下)

(温馨提示鼠标吧文件拖进CMD里面)

3、再添加上你要转换的文件地址(两者之间有空格)
调用讯飞API朗读新浪国际新闻版块最新消息_第1张图片
pyinstaller.exe后面如果加上-F就是打包为一个exe文件(文件会比较大),如果不加就会有很多库文件;加上-w就是打包为没有cmd窗口的exe,不加运行时就会出现cmd窗口。(加不加凭个人喜好)
例如
被打包的文件: xx.py, 人人皆知.
打包的命令: pyinstaller -D xx.py
打包的console提示: 见随后的引述块
打包发布exe时的纪律(注意事项):
很简单的py脚本. Simple is better than complex. 应该遵守Python的规则.
所以在打包为exe时, 应该尽量减少没有必要的python包的导入. 否则吃的太胖
可以运行的exe文件位于: dist 目录下, 而不是build目录下的.

运行
在这里插入图片描述

离线可选择其他语音合成引擎

pyttsx3模块初探及实战项目

import pyttsx3
engine = pyttsx3.init() # object creation

""" RATE"""
rate = engine.getProperty('rate')   # getting details of current speaking rate
print (rate)                        #printing current voice rate
engine.setProperty('rate', 125)     # setting up new voice rate


"""VOLUME"""
volume = engine.getProperty('volume')   #getting to know current volume level (min=0 and max=1)
print (volume)                          #printing current volume level
engine.setProperty('volume',1.0)    # setting up volume level  between 0 and 1

"""VOICE"""
voices = engine.getProperty('voices')       #getting details of current voice
#engine.setProperty('voice', voices[0].id)  #changing index, changes voices. o for male
engine.setProperty('voice', voices[1].id)   #changing index, changes voices. 1 for female

engine.say("Hello World!")
engine.say('My current speaking rate is ' + str(rate))
engine.runAndWait()
engine.stop()
import win32com.client
#用 微软这个服务器 朗读中文小说
speaker = win32com.client.Dispatch("SAPI.SpVoice")
with open('t2v_test.txt','r',encoding='utf-8') as f:
    line = f.read()
    speaker = win32com.client.Dispatch("SAPI.SpVoice")
    speaker.Speak(line)

你可能感兴趣的:(趣味应用)