一、准备工作
(1)开通腾讯云 https://cloud.tencent.com/
(2)腾讯云控制台开通实时语音权限 https://console.cloud.tencent.com/asr
(3)控制台设置秘钥 https://console.cloud.tencent.com/cam/capi
内容 | 说明 |
---|---|
支持语言 | 中文普通话、英文、粤语、韩语、日语、泰语、上海话方言 |
支持行业 | 通用、金融、游戏、教育、医疗 |
音频属性 | 采样率:16000Hz或8000Hz、采样精度:16bits、声道:单声道 |
音频格式 | wav、pcm、opus、speex、silk、mp3、m4a、aac |
数据长度 | 音频流中每个数据包的音频分片建议为200ms,8k采样率对应的音频分片大小为3200字节,16k采样率对应的音频分片大小为6400字节 |
二、代码(HTTP、Websocket)
http协议代码如下(https://cloud.tencent.com/document/product/1093/35799)需注意,目前Http协议实时语音官方已下线:
代码语言:javascript复制<?php
//实时语音功能
class voiceTest{
// -------------- Required. 请登录腾讯云官网控制台获取 ---------------------
const APPID = "appid";#需要配置
const SECRET_ID = "秘钥id";#需要配置
const SECRET_KEY = "秘钥key";#需要配置
const AGREEMENT = "https";
const VOICE_URL = "asr.cloud.tencent.com/asr/v1/";
const HTTPRequestMethod = "POST";
// --------------- Optional, 请按需修改 ---------------------
/** 引擎模型类型
• 8k_zh:电话 8k 中文普通话通用;
• 8k_zh_finance:电话 8k 金融领域模型;
非电话场景:
• 16k_zh:16k 中文普通话通用;
• 16k_en:16k 英语;
• 16k_ca:16k 粤语;
• 16k_ko:16k 韩语;
• 16k_zh-TW:16k 中文普通话繁体;
• 16k_ja:16k 日语。
**/
static $ENGINE_MODEL_TYPE = '16k_zh';
//结果返回方式 0:同步返回,拿到全部中间结果, or 1:尾包返回
static $RES_TYPE = 1;
//1:实时流式识别
static $SUB_SERVICE_TYPE = 1;
// 识别结果文本编码方式 0:UTF-8, 1:GB2312, 2:GBK,3:BIG5
static $RESULT_TEXT_FORMAT = 0;
// 语音编码方式 1:wav 4:sp 6:silk
static $VOICE_FORMAT = 8;
//热词
static $HOT_WORD_ID = "";
//如果音频流总时长超过60秒,用户需开启 vad。0:关闭 vad,1:开启 vad。
static $NEEDVAD = 0;
//语音断句检测阈值,静音时长超过该阈值会被认为断句(多用在智能客服场景,需配合 needvad=1 使用),取值范围150-2000,单位 ms,目前仅支持 8k_zh 引擎模型
static $VAD_SILENCE_TIME = 2000;
//默认0
static $SOURCE = 0;
//后处理参数
static $FILTER_DIRTY = 0;
static $FILTER_MODAL = 0;
static $FILTER_PUNC = 0;
static $CONVERT_NUM_MODE = 0;
static $WORD_INFO = 0;
// 语音切片长度 cutlength<200000
static $CUTLENGTH = 60000;
public static function voice($pathFile){
//get请求 设置url参数
$timestamp = time();
$httpUrlParams =
[
"appid" => self::APPID,
"projectid" => 0,
"secretid" => self::SECRET_ID,
"sub_service_type" => self::$SUB_SERVICE_TYPE, //1:实时流式识别
"engine_model_type" => self::$ENGINE_MODEL_TYPE,
"result_text_format" => self::$RESULT_TEXT_FORMAT,
"res_type" => self::$RES_TYPE,
"voice_format" => self::$VOICE_FORMAT,
"needvad" => self::$NEEDVAD,
"source" => self::$SOURCE,
"voice_id" => self::getRandomString(16),//16 位 String 串作为每个音频的唯一标识,用户自己生成。
"timestamp" => $timestamp,
"expired" => $timestamp 24 * 60 * 60,
"nonce" => rand(1, 100000),//随机正整数
"filter_dirty" => self::$FILTER_DIRTY,
"filter_modal" => self::$FILTER_MODAL,
"filter_punc" => self::$FILTER_PUNC,
"convert_num_mode" => self::$CONVERT_NUM_MODE,
"word_info" => self::$WORD_INFO,
];
//print_r($httpUrlParams);exit;
//查询是否设置热词
if (self::$HOT_WORD_ID != "")
{
$httpUrlParams["hotword_id"] = self::$HOT_WORD_ID;
}
//查询是否设置语音断句检测阈值 需配合 needvad=1 使用,取值范围150-2000,目前仅支持8k_zh
if (self::$VAD_SILENCE_TIME >= 150
&& self::$VAD_SILENCE_TIME <= 2000
&& $httpUrlParams["needvad"] == 1
&& $httpUrlParams["engine_model_type"] == "8k_zh")
{
$httpUrlParams["vad_silence_time"] = self::$VAD_SILENCE_TIME;
}
//获取传入语音包大小
$voice_data = file_get_contents($pathFile);
//计算数据包可分片次数
$voicelen = strlen($voice_data);
$whilenum = ceil($voicelen / self::$CUTLENGTH);
$voiceSeq = 0;
//分片传入
while ($voiceSeq < $whilenum) {
$voiceEnd = 0;
//最后分片
if ($voiceSeq == ($whilenum - 1)) {
$voiceEnd = 1;
}
$httpUrlParams["seq"] = $voiceSeq;
$httpUrlParams["end"] = $voiceEnd;//最后一片为1
//计算包节点
$offset = $voiceSeq * self::$CUTLENGTH;
$voiceSeq ;
//get请求url拼接
$requestUrl = self::AGREEMENT."://".self::VOICE_URL.self::APPID."?";
//剔除appid
unset($httpUrlParams["appid"]);
//生成URL请求地址
$requestUrl .= http_build_query($httpUrlParams);
//鉴权
$sign = self::getAuthorizationString($httpUrlParams);
//分片数据包
$sectionData = file_get_contents($pathFile, NULL, NULL, $offset, self::$CUTLENGTH);
$headers = [
'Authorization: ' . $sign,
'Content-Length: ' . strlen($sectionData),
];
$result = self::get_curl_request($requestUrl, $sectionData, 'POST', $headers);
echo $result , "n";
}
}
/**
* 发送请求
* @param $url
* @param array $param
* @param string $mothod
* @param array $headers
* @param int $return_status
* @param int $flag 关闭https证书
* @return array|bool|string
*/
static private function get_curl_request($url, $param, $mothod = 'POST', $headers = [], $return_status = 0, $flag = 0)
{
$ch = curl_init();
if (!$flag) {
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
}
curl_setopt($ch, CURLOPT_TIMEOUT, 6);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
if (strtolower($mothod) == 'post') {
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $param);
} else {
$url = $url . "?" . http_build_query($param);
}
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 2);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
//代理
// curl_setopt($ch, CURLOPT_PROXY, "127.0.0.1");
// curl_setopt($ch, CURLOPT_PROXYPORT, "12639");
$ret = curl_exec($ch);
$code = curl_getinfo($ch);
curl_close($ch);
if ($return_status == "1") {
return array($ret, $code);
}
return $ret;
}
/**
* 生成随机字符串
* @param $len
* @param bool $special 是否开启特殊字符
* @return string
*/
private static function getRandomString($len, $special=false){
$chars = array(
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
"l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
"w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G",
"H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R",
"S", "T", "U", "V", "W", "X", "Y", "Z", "0", "1", "2",
"3", "4", "5", "6", "7", "8", "9"
);
if($special){
$chars = array_merge($chars, array(
"!", "@", "#", "$", "?", "|", "{", "/", ":", ";",
"%", "^", "&", "*", "(", ")", "-", "_", "[", "]",
"}", "<", ">", "~", " ", "=", ",", "."
));
}
$charsLen = count($chars) - 1;
shuffle($chars); //打乱数组顺序
$str = '';
for($i=0; $i<$len; $i ){
$str .= $chars[mt_rand(0, $charsLen)]; //随机取出一位
}
return $str;
}
/**
* 创建签名
* @param $params array 提交参数数组
* @return string
*/
private static function getAuthorizationString($params){
//加密字符串拼接
$signString = self::HTTPRequestMethod.self::VOICE_URL.self::APPID."?";
//排序
ksort($params, SORT_STRING);
//去除appid
unset($params["appid"]);
//转url
$signString .= http_build_query($params);
$sign = base64_encode(hash_hmac('SHA1', $signString, self::SECRET_KEY, true));
return $sign;
}
}
//设置参数
//请求
voiceTest::voice("./test_wav/16k/16k_30s.wav");
websocket协议代码如下(https://cloud.tencent.com/document/product/1093/48982):
代码语言:javascript复制<?php
//第一步安装socket 包 # https://github.com/Textalk/websocket-php
// composer require textalk/websocket
//第二步引用
require_once "vendor/autoload.php";
//PHP 关于实时语音识别请求鉴权
class wsVoice{
const APPID = "appid";#需要配置
const SECRET_ID = "秘钥id";# 需要配置
const SECRET_KEY = "秘钥key";# 需要配置
const AGREEMENT = "wss://";
const VOICE_URL = "asr.cloud.tencent.com/asr/v2/";
/** 引擎模型类型
• 8k_zh:电话 8k 中文普通话通用;
• 8k_zh_finance:电话 8k 金融领域模型;
非电话场景:
• 16k_zh:16k 中文普通话通用;
• 16k_en:16k 英语;
• 16k_ca:16k 粤语;
• 16k_ko:16k 韩语;
• 16k_zh-TW:16k 中文普通话繁体;
• 16k_ja:16k 日语。
**/
static $ENGINE_MODEL_TYPE = '16k_zh';
// 语音编码方式 1:wav 4:sp 6:silk
static $VOICE_FORMAT = 1;
//热词
static $HOT_WORD_ID = "";
//如果音频流总时长超过60秒,用户需开启 vad。0:关闭 vad,1:开启 vad。
static $NEEDVAD = 0;
//语音断句检测阈值,静音时长超过该阈值会被认为断句(多用在智能客服场景,需配合 needvad=1 使用),取值范围150-2000,单位 ms,目前仅支持 8k_zh 引擎模型
static $VAD_SILENCE_TIME = 2000;
//后处理参数
static $FILTER_DIRTY = 0;
static $FILTER_MODAL = 0;
static $FILTER_PUNC = 0;
// 语音切片长度 cutlength<200000
static $CUTLENGTH = 6000;
public function ws($pathFile = ""){
$timestamp = time();
$httpUrlParams =
[
"secretid" => self::SECRET_ID,
"timestamp" => $timestamp,
"expired" => $timestamp 24 * 60 * 60,
"nonce" => rand(1, 100000),//随机正整数
"engine_model_type" => self::$ENGINE_MODEL_TYPE,
"voice_id" => self::getRandomString(16),//16 位 String 串作为每个音频的唯一标识,用户自己生成。
"voice_format" => self::$VOICE_FORMAT,
"needvad" => self::$NEEDVAD,
"filter_dirty" => self::$FILTER_DIRTY,
"filter_modal" => self::$FILTER_MODAL,
"filter_punc" => self::$FILTER_PUNC,
"convert_num_mode" => 1,
"word_info" => 0,
//"hotword_id"
//"vad_silence_time"
//"signature"
];
//查询是否设置热词
if (self::$HOT_WORD_ID != "")
{
$httpUrlParams["hotword_id"] = self::$HOT_WORD_ID;
}
//查询是否设置语音断句检测阈值 需配合 needvad=1 使用,取值范围150-2000,目前仅支持8k_zh
if (self::$VAD_SILENCE_TIME >= 240
&& self::$VAD_SILENCE_TIME <= 2000
&& $httpUrlParams["needvad"] == 1
&& in_array($httpUrlParams["engine_model_type"], ["8k_zh", "8k_zh_finance", "16k_zh"]))
{
$httpUrlParams["vad_silence_time"] = self::$VAD_SILENCE_TIME;
}
//获取加密
$signature = self::getAuthorizationString($httpUrlParams);
echo $signature." n";
//请求地址
$httpUrlParams["signature"] = $signature;
$requestUrl = self::AGREEMENT.self::VOICE_URL.self::APPID."?";
$requestUrl .= http_build_query($httpUrlParams);
echo $requestUrl. " n";
//获取传入语音包大小
$voice_data = file_get_contents($pathFile);
//计算数据包可分片次数
$voicelen = strlen($voice_data);
$whilenum = ceil($voicelen / self::$CUTLENGTH);
$voiceSeq = 0;
//分片传入
$client = new WebSocketClient($requestUrl);
// $client->binary("初次握手");
// echo $client->receive();
echo "总次数".$whilenum. " n";
while ($voiceSeq < $whilenum) {
//计算包节点
$offset = $voiceSeq * self::$CUTLENGTH;
$voiceSeq ;
//分片数据包
$sectionData = file_get_contents($pathFile, NULL, NULL, $offset, self::$CUTLENGTH);
//echo $sectionData;exit;
//ws请求
$client->binary($sectionData);
echo $client->receive();
echo "n";
}
echo "结束 n";
$client->text('{"type": "end"}');
echo $client->receive();
$client->close();
}
/**
* 创建签名
* @param $params array 提交参数数组
* @return string
*/
private static function getAuthorizationString($params){
//加密字符串拼接
$signString = self::VOICE_URL.self::APPID."?";
//排序
ksort($params, SORT_STRING);
//转url
$signString .= http_build_query($params);
$sign = base64_encode(hash_hmac('SHA1', $signString, self::SECRET_KEY, true));
return $sign;
}
/**
* 生成随机字符串
* @param $len
* @param bool $special 是否开启特殊字符
* @return string
*/
private static function getRandomString($len, $special=false){
$chars = array(
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
"l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
"w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G",
"H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R",
"S", "T", "U", "V", "W", "X", "Y", "Z", "0", "1", "2",
"3", "4", "5", "6", "7", "8", "9"
);
if($special){
$chars = array_merge($chars, array(
"!", "@", "#", "$", "?", "|", "{", "/", ":", ";",
"%", "^", "&", "*", "(", ")", "-", "_", "[", "]",
"}", "<", ">", "~", " ", "=", ",", "."
));
}
$charsLen = count($chars) - 1;
shuffle($chars); //打乱数组顺序
$str = '';
for($i=0; $i<$len; $i ){
$str .= $chars[mt_rand(0, $charsLen)]; //随机取出一位
}
return $str;
}
}
$model = new wsVoice();
$model->ws("./test.pcm");