百度语音识别api使用python进行调用

2019-11-28 16:31:37 浏览数 (1)

百度语音现在是比较方便的接口,具体说明请看官方文档,本文分两个部分,先是使用python实现录音,然后再使用百度语音api进行识别上传。

首先是实现录音功能,因为百度语言识别有一些录音品质的要求的。so。。下文的代码可以按时间为文件名生成录音文件,产生一个gui界面,点击按钮后开始录音。

百度语音REST API支持整段录音文件的识别,对录音格式有一定的要求,支持语音识别控件:集成提示音、音量反馈动效整套交互的对话框控件,方便开发者快速集成;

原始PCM的录音参数必须符合8k/16k采样率、16bit位深、单声道,支持的压缩格式有:pcm(不压缩)、wav、opus、speex、amr、x-flac。

语音识别接口支持POST 方式  目前API仅支持整段语音识别的模式,即需要上传整段语音进行识别  语音数据上传方式有两种:隐示发送和显示发送  原始语音的录音格式目前只支持评测8k/16k采样率16bit位深的单声道语音  压缩格式支持:pcm(不压缩)、wav、opus、speex、amr、x-flac  系统支持语言种类:中文(zh)、粤语(ct)、英文(en)

Python

#!usr/bin/env python #coding=utf-8 import numpy as np from pyaudio import PyAudio,paInt16 from datetime import datetime import wave from Tkinter import * #define of params NUM_SAMPLES = 2000 framerate = 8000 channels = 1 sampwidth = 2 #record time TIME = 10 def save_wave_file(filename, data): '''save the date to the wav file''' wf = wave.open(filename, 'wb') wf.setnchannels(channels) wf.setsampwidth(sampwidth) wf.setframerate(framerate) wf.writeframes("".join(data)) wf.close() def my_button(root,label_text,button_text,button_func): '''''function of creat label and button''' #label details label = Label(root) label['text'] = label_text label.pack() #label details button = Button(root) button['text'] = button_text button['command'] = button_func button.pack() def record_wave(): #open the input of wave pa = PyAudio() stream = pa.open(format = paInt16, channels = 1, rate = framerate, input = True, frames_per_buffer = NUM_SAMPLES) save_buffer = [] count = 0 while count < TIME*4: #read NUM_SAMPLES sampling data string_audio_data = stream.read(NUM_SAMPLES) save_buffer.append(string_audio_data) count = 1 print '.' filename = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") ".wav" save_wave_file(filename, save_buffer) save_buffer = [] print filename, "saved" def main(): root = Tk() my_button(root,"Record a wave","clik to record",record_wave) root.mainloop() if __name__ == "__main__": main()

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465

#!usr/bin/env python#coding=utf-8 import numpy as npfrom pyaudio import PyAudio,paInt16from datetime import datetimeimport wavefrom Tkinter import * #define of paramsNUM_SAMPLES = 2000framerate = 8000channels = 1sampwidth = 2#record timeTIME = 10 def save_wave_file(filename, data):  '''save the date to the wav file'''  wf = wave.open(filename, 'wb')  wf.setnchannels(channels)  wf.setsampwidth(sampwidth)  wf.setframerate(framerate)  wf.writeframes("".join(data))  wf.close() def my_button(root,label_text,button_text,button_func):      '''''function of creat label and button'''      #label details      label = Label(root)      label['text'] = label_text      label.pack()      #label details      button = Button(root)      button['text'] = button_text      button['command'] = button_func      button.pack()   def record_wave():  #open the input of wave  pa = PyAudio()  stream = pa.open(format = paInt16, channels = 1,          rate = framerate, input = True,          frames_per_buffer = NUM_SAMPLES)  save_buffer = []  count = 0  while count < TIME*4:    #read NUM_SAMPLES sampling data    string_audio_data = stream.read(NUM_SAMPLES)    save_buffer.append(string_audio_data)    count = 1    print '.'   filename = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") ".wav"  save_wave_file(filename, save_buffer)  save_buffer = []  print filename, "saved" def main():  root = Tk()  my_button(root,"Record a wave","clik to record",record_wave)  root.mainloop()  if __name__ == "__main__":  main()

完成录音后看文件目录是否已经出现一个。wav格式的文件了呢。一次录音大概是十秒钟。然后修改文件名为1.wav

执行下面的程序。有部分需要按照你的id和key进行修改噢。

Python

#encoding=utf-8 import wave import urllib, urllib2, pycurl import base64 import json ## get access token by api key & secret key ## 获得token,需要填写你的apikey以及secretkey def get_token(): apiKey = "Ll0c53MSac6GBOtpg22ZSGAU**" secretKey = "44c8af396038a24e34936227d4a19dc2**" auth_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=" apiKey "&client_secret=" secretKey; res = urllib2.urlopen(auth_url) json_data = res.read() return json.loads(json_data)['access_token'] def dump_res(buf): print (buf) ## post audio to server def use_cloud(token): fp = wave.open('1.wav', 'rb')#录音文件名 ##已经录好音的语音片段 nf = fp.getnframes() f_len = nf * 2 audio_data = fp.readframes(nf) cuid = "7519663**" #你的产品id srv_url = 'http://vop.baidu.com/server_api' '?cuid=' cuid '&token=' token http_header = [ 'Content-Type: audio/pcm; rate=8000', 'Content-Length: %d' % f_len ] c = pycurl.Curl() c.setopt(pycurl.URL, str(srv_url)) #curl doesn't support unicode #c.setopt(c.RETURNTRANSFER, 1) c.setopt(c.HTTPHEADER, http_header) #must be list, not dict c.setopt(c.POST, 1) c.setopt(c.CONNECTTIMEOUT, 30) c.setopt(c.TIMEOUT, 30) c.setopt(c.WRITEFUNCTION, dump_res) c.setopt(c.POSTFIELDS, audio_data) c.setopt(c.POSTFIELDSIZE, f_len) c.perform() #pycurl.perform() has no return val if __name__ == "__main__": token = get_token() #获得token use_cloud(token) #进行处理,然后

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354

#encoding=utf-8 import waveimport urllib, urllib2, pycurlimport base64import json## get access token by api key & secret key## 获得token,需要填写你的apikey以及secretkeydef get_token():    apiKey = "Ll0c53MSac6GBOtpg22ZSGAU**"    secretKey = "44c8af396038a24e34936227d4a19dc2**"     auth_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=" apiKey "&client_secret=" secretKey;     res = urllib2.urlopen(auth_url)    json_data = res.read()    return json.loads(json_data)['access_token'] def dump_res(buf):    print (buf)  ## post audio to serverdef use_cloud(token):    fp = wave.open('1.wav', 'rb')#录音文件名    ##已经录好音的语音片段    nf = fp.getnframes()    f_len = nf * 2    audio_data = fp.readframes(nf)     cuid = "7519663**" #你的产品id    srv_url = 'http://vop.baidu.com/server_api' '?cuid=' cuid '&token=' token    http_header = [        'Content-Type: audio/pcm; rate=8000',        'Content-Length: %d' % f_len    ]     c = pycurl.Curl()    c.setopt(pycurl.URL, str(srv_url)) #curl doesn't support unicode    #c.setopt(c.RETURNTRANSFER, 1)    c.setopt(c.HTTPHEADER, http_header)   #must be list, not dict    c.setopt(c.POST, 1)    c.setopt(c.CONNECTTIMEOUT, 30)    c.setopt(c.TIMEOUT, 30)    c.setopt(c.WRITEFUNCTION, dump_res)    c.setopt(c.POSTFIELDS, audio_data)    c.setopt(c.POSTFIELDSIZE, f_len)    c.perform() #pycurl.perform() has no return val if __name__ == "__main__":    token = get_token()    #获得token    use_cloud(token)    #进行处理,然后

再执行python,等待一小段时间就可以返回看到

就是这样:

主要代码的思路流程很清晰的.

百度语音识别通过 REST API 的方式给开发者提供一个通用的 HTTP 接口,基于该接口,开发者可以轻松的获取语音识别能力。SDK中只提供了PHP、C和JAVA的相关样例,然而个人以为,使用Python开发难度更低,本文描述了简单使用Python调用百度语音识别服务 REST API 的简单样例。

注册开发者帐号和创建应用不再赘述,百度的REST API在调用过程基本分为三步:

  1. 获取token
  2. 提交数据
  3. 处理JSON

0 人点赞