Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
amos-boot-biz
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
项目统一框架
amos-boot-biz
Commits
5ebb50c4
Commit
5ebb50c4
authored
Dec 01, 2021
by
helinlin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加阿里云音频识别接口
parent
d82751ae
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
410 additions
and
76 deletions
+410
-76
AudioToText.java
...oin/amos/boot/module/jcs/biz/audioToText/AudioToText.java
+0
-0
SocketClient.java
...in/amos/boot/module/jcs/biz/audioToText/SocketClient.java
+54
-0
SpeechTranscriberListener.java
...module/jcs/biz/audioToText/SpeechTranscriberListener.java
+195
-0
Audio2TextController.java
.../boot/module/jcs/biz/controller/Audio2TextController.java
+70
-0
AppSpeechTranscriber.java
...in/java/com/yeejoin/amos/speech/AppSpeechTranscriber.java
+75
-60
SpeechApplicationTests.java
...src/test/java/com/yejoin/amos/SpeechApplicationTests.java
+16
-16
No files found.
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/AudioToText.java
View file @
5ebb50c4
This diff is collapsed.
Click to expand it.
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/SocketClient.java
0 → 100644
View file @
5ebb50c4
package
com
.
yeejoin
.
amos
.
boot
.
module
.
jcs
.
biz
.
audioToText
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.scheduling.annotation.Async
;
import
org.springframework.stereotype.Component
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.OutputStream
;
import
java.net.InetSocketAddress
;
import
java.net.Socket
;
import
java.util.concurrent.TimeUnit
;
/**
* 模拟客户端发送数据
*/
@Component
public
class
SocketClient
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SocketClient
.
class
);
private
static
final
String
[]
testFilePath
=
{
"D:\\ffmpeg-4.4-full_build-shared\\bin\\out.pcm"
,
};
public
static
void
main
(
String
[]
args
)
{
SocketClient
socketClient
=
new
SocketClient
();
socketClient
.
process
(
0
,
0
);
}
@Async
public
void
process
(
int
port
,
int
type
)
{
if
(
type
<
0
)
type
=
0
;
if
(
type
>=
testFilePath
.
length
)
type
-=
1
;
Socket
socket
=
new
Socket
();
try
{
socket
.
connect
(
new
InetSocketAddress
(
"127.0.0.1"
,
port
));
OutputStream
outputStream
=
socket
.
getOutputStream
();
FileInputStream
fis
=
new
FileInputStream
(
new
File
(
testFilePath
[
type
]));
byte
[]
b
=
new
byte
[
4096
];
int
len
;
while
((
len
=
fis
.
read
(
b
))
>
0
)
{
logger
.
info
(
"send data pack length: "
+
len
);
outputStream
.
write
(
b
);
TimeUnit
.
MILLISECONDS
.
sleep
(
400
);
}
outputStream
.
flush
();
outputStream
.
close
();
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/SpeechTranscriberListener.java
0 → 100644
View file @
5ebb50c4
package
com
.
yeejoin
.
amos
.
boot
.
module
.
jcs
.
biz
.
audioToText
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.yeejoin.amos.component.rule.config.RuleConfig
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.scheduling.annotation.Async
;
import
org.typroject.tyboot.component.emq.EmqKeeper
;
import
java.util.ArrayList
;
import
java.util.List
;
/**
* 实时语音识别 回调函数
*/
public
class
SpeechTranscriberListener
extends
com
.
alibaba
.
nls
.
client
.
protocol
.
asr
.
SpeechTranscriberListener
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechTranscriberListener
.
class
);
public
static
final
int
RESULT_SUCCESS_CODE
=
20000000
;
//识别的记录
List
<
AudioRecord
>
audioRecords
=
new
ArrayList
<>();
//识别的关键字
AudioKeyWord
audioKeyWord
=
new
AudioKeyWord
();
//当前识别的音频来源
private
final
String
number
;
//mqtt客户端
private
final
EmqKeeper
emqKeeper
;
public
SpeechTranscriberListener
(
String
number
,
EmqKeeper
emqKeeper
)
{
this
.
number
=
number
;
this
.
emqKeeper
=
emqKeeper
;
}
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public
void
onTranscriptionResultChange
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"语音识别过程中返回的结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
if
(
response
.
getStatus
()
==
RESULT_SUCCESS_CODE
)
{
sendToMqtt
(
response
,
audioRecords
,
number
);
extractKeyWord
(
response
,
audioKeyWord
,
number
);
}
else
{
logger
.
error
(
"异常的相应结果,响应码:"
+
response
.
getStatus
());
}
}
/**
* 服务端准备好了进行识别
*/
@Override
public
void
onTranscriberStart
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端准备好了进行识别"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public
void
onSentenceBegin
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的开始"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public
void
onSentenceEnd
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的结束"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//置信度
", confidence: "
+
response
.
getConfidence
()
+
//开始时间
", begin_time: "
+
response
.
getSentenceBeginTime
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public
void
onTranscriptionComplete
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"识别结束后返回的最终结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
()
+
",result:"
+
response
.
getTransSentenceText
());
}
/**
* 失败处理
*/
@Override
public
void
onFail
(
SpeechTranscriberResponse
response
)
{
logger
.
error
(
"失败处理"
);
logger
.
error
(
"task_id: "
+
response
.
getTaskId
()
+
", status: "
+
response
.
getStatus
()
+
", status_text: "
+
response
.
getStatusText
());
}
/**
* 异步发送结果至mqtt,保持回调函数畅通
*
* @param response 语音句子识别返回结果
* @param audioRecords 历史识别记录
*/
@Async
public
void
sendToMqtt
(
SpeechTranscriberResponse
response
,
List
<
AudioRecord
>
audioRecords
,
String
number
)
{
AudioRecord
audioRecord
=
new
AudioRecord
.
AudioRecordBuilder
()
.
type
(
response
.
getTransSentenceIndex
()
%
2
)
// TODO 区别说话角色
.
taskId
(
response
.
getTaskId
())
.
name
(
response
.
getName
())
.
status
(
response
.
getStatus
())
.
index
(
response
.
getTransSentenceIndex
())
.
message
(
response
.
getTransSentenceText
())
.
confidence
(
response
.
getConfidence
())
.
time
(
response
.
getTransSentenceTime
())
.
build
();
int
index
=
audioRecord
.
getIndex
()
-
1
;
if
(
index
>=
audioRecords
.
size
())
{
audioRecords
.
add
(
audioRecord
);
}
else
{
audioRecords
.
set
(
index
,
audioRecord
);
}
try
{
ObjectMapper
objectMapper
=
new
ObjectMapper
();
emqKeeper
.
getMqttClient
().
publish
(
MessageType
.
RECORD
.
getName
()
+
"_"
+
number
,
objectMapper
.
writeValueAsString
(
audioRecord
).
getBytes
(),
RuleConfig
.
DEFAULT_QOS
,
true
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
error
(
"发送音频识别结果消息异常,原因:"
+
e
.
getMessage
());
}
}
/**
* 异步发送结果至mqtt,保持回调函数畅通
*
* @param response 语音句子识别返回结果
* @param audioKeyWord //关键字结果集
*/
@Async
public
void
extractKeyWord
(
SpeechTranscriberResponse
response
,
AudioKeyWord
audioKeyWord
,
String
number
)
{
try
{
String
result
=
response
.
getTransSentenceText
();
//寻找关键字
for
(
MessageKeywords
messageKeyword
:
MessageKeywords
.
values
())
{
for
(
String
keyword
:
messageKeyword
.
getKeyword
())
{
int
index
=
result
.
indexOf
(
keyword
);
//TODO 暂时截取到末尾
if
(
index
!=
-
1
)
{
String
keywordValue
=
result
.
substring
(
index
);
audioKeyWord
.
getValues
().
get
(
messageKeyword
.
getType
()).
add
(
keywordValue
);
}
}
}
ObjectMapper
objectMapper
=
new
ObjectMapper
();
byte
[]
bytes
=
objectMapper
.
writeValueAsString
(
audioKeyWord
.
getValues
()).
getBytes
();
emqKeeper
.
getMqttClient
().
publish
(
MessageType
.
KEYWORD
.
getName
()
+
"_"
+
number
,
bytes
,
RuleConfig
.
DEFAULT_QOS
,
true
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
error
(
"发送音频关键字消息异常,原因:"
+
e
.
getMessage
());
}
}
}
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/controller/Audio2TextController.java
0 → 100644
View file @
5ebb50c4
package
com
.
yeejoin
.
amos
.
boot
.
module
.
jcs
.
biz
.
controller
;
import
com.yeejoin.amos.boot.module.jcs.biz.audioToText.AudioToText
;
import
com.yeejoin.amos.boot.module.jcs.biz.audioToText.SocketClient
;
import
io.swagger.annotations.Api
;
import
io.swagger.annotations.ApiOperation
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.web.bind.annotation.GetMapping
;
import
org.springframework.web.bind.annotation.RequestMapping
;
import
org.springframework.web.bind.annotation.RequestParam
;
import
org.springframework.web.bind.annotation.RestController
;
import
org.typroject.tyboot.core.foundation.enumeration.UserType
;
import
org.typroject.tyboot.core.restful.doc.TycloudOperation
;
import
java.util.HashMap
;
@RestController
@Api
(
tags
=
"语音转文字Api"
)
@RequestMapping
(
value
=
"/Audio2TextController"
)
public
class
Audio2TextController
{
@Autowired
AudioToText
audioToText
;
@Autowired
SocketClient
socketClient
;
/**
* 测试语音转文字第一步
*
* @param myNumber 我的手机号
*/
@TycloudOperation
(
ApiLevel
=
UserType
.
AGENCY
)
@GetMapping
(
"/startConvert"
)
@ApiOperation
(
httpMethod
=
"GET"
,
value
=
"测试语音转文字第一步"
,
notes
=
"number为当前用户的手机号"
)
public
HashMap
<
String
,
Integer
>
startConvert
(
@RequestParam
String
myNumber
,
@RequestParam
String
callerNumber
)
{
return
audioToText
.
convert
(
myNumber
,
callerNumber
);
}
/**
* 测试语音转文字第二步
*/
@TycloudOperation
(
ApiLevel
=
UserType
.
AGENCY
)
@GetMapping
(
"/startSendAudio"
)
@ApiOperation
(
httpMethod
=
"GET"
,
value
=
"测试语音转文字第二步"
,
notes
=
"测试语音转文字第二步"
)
public
String
startSendAudio
(
@RequestParam
int
port
,
Integer
type
)
{
if
(
type
==
null
)
type
=
0
;
socketClient
.
process
(
port
,
type
);
return
"success"
;
}
/**
* 第一步收到转换请求后,启动两个serverSocket,监听不同端口
* 第一步调用语音融合系统的API并传递两个监听的端口号和本机IP地址
* 第三步serverSocket收到数据请求,开始将数据推至阿里云语音识别系统进行识别
* 第四步回调函数中获取识别结果,使用mqtt客户端推送至mqtt服务器
* 第五步前端订阅消息并进行展示
*
* @param cid 通话id
* @param myPhone 我的手机号
* @param caller 呼入手机号
*/
@TycloudOperation
(
ApiLevel
=
UserType
.
AGENCY
)
@GetMapping
(
"/startConvertText"
)
@ApiOperation
(
httpMethod
=
"GET"
,
value
=
"静听电话回调后端开始转文字"
,
notes
=
"静听电话回调后端开始转文字"
)
public
void
startConvertText
(
String
cid
,
String
myPhone
,
String
caller
)
{
}
}
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/SpeechTranscriber.java
→
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/
App
SpeechTranscriber.java
View file @
5ebb50c4
...
...
@@ -2,13 +2,15 @@ package com.yeejoin.amos.speech;
import
com.alibaba.nls.client.protocol.InputFormatEnum
;
import
com.alibaba.nls.client.protocol.SampleRateEnum
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriber
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.util.concurrent.TimeUnit
;
import
java.io.IOException
;
import
java.io.InputStream
;
import
java.net.ServerSocket
;
import
java.net.Socket
;
/**
...
...
@@ -17,85 +19,98 @@ import java.util.concurrent.TimeUnit;
* 支持的输入格式:PCM(无压缩的PCM或WAV文件)、16 bit采样位数、单声道(mono)。
* 支持的音频采样率:8000 Hz和16000 Hz。
*/
public
class
SpeechTranscriber
{
public
class
App
SpeechTranscriber
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechTranscriber
.
class
);
//语音识别文件
private
final
File
speechFile
;
//识别回调函数
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
AppSpeechTranscriber
.
class
);
private
final
SpeechTranscriberListener
listener
;
private
final
ServerSocket
serverSocket
;
public
SpeechTranscriber
(
File
speechFile
,
SpeechTranscriberListener
listener
)
{
if
(
speechFile
!=
null
&&
speechFile
.
exists
()
&&
speechFile
.
isFile
())
{
this
.
speechFile
=
speechFile
;
this
.
listener
=
listener
;
}
else
{
throw
new
IllegalArgumentException
(
"待识别的文件存在异常"
);
}
public
AppSpeechTranscriber
(
SpeechTranscriberListener
listener
,
ServerSocket
serverSocket
)
{
this
.
listener
=
listener
;
this
.
serverSocket
=
serverSocket
;
}
/**
* 开始语音识别
*/
public
void
process
()
{
com
.
alibaba
.
nls
.
client
.
protocol
.
asr
.
SpeechTranscriber
transcriber
=
null
;
SpeechTranscriber
transcriber
=
null
;
try
{
//启动ServerSocket等待接收音频数据,只接受一次请求
logger
.
warn
(
"serverSocket已启动,地址:"
+
serverSocket
.
getInetAddress
().
getHostAddress
()
+
"监听端口:"
+
serverSocket
.
getLocalPort
()
+
" 等待语音融合系统推送数据..."
);
Socket
socket
=
serverSocket
.
accept
();
logger
.
warn
(
"收到用户连接请求,开始读取数据"
);
//创建实例、建立连接。
transcriber
=
new
com
.
alibaba
.
nls
.
client
.
protocol
.
asr
.
SpeechTranscriber
(
AppNslClient
.
instance
(),
listener
);
transcriber
.
setAppKey
(
SpeechConfig
.
AppKey
);
//输入音频编码方式。
transcriber
.
setFormat
(
InputFormatEnum
.
PCM
);
//输入音频采样率。
transcriber
.
setSampleRate
(
SampleRateEnum
.
SAMPLE_RATE_16K
);
//是否返回中间识别结果。
transcriber
.
setEnableIntermediateResult
(
true
);
//是否生成并返回标点符号。
transcriber
.
setEnablePunctuation
(
true
);
//是否将返回结果规整化,比如将一百返回为100。
transcriber
.
setEnableITN
(
true
);
//设置vad断句参数。默认值:800ms,有效值:200ms~2000ms。
transcriber
.
addCustomedParam
(
"max_sentence_silence"
,
500
);
//设置是否语义断句。
//transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
//设置是否开启顺滑。
//transcriber.addCustomedParam("disfluency",true);
//设置是否开启词模式。
transcriber
.
addCustomedParam
(
"enable_words"
,
true
);
//设置vad噪音阈值参数,参数取值为-1~+1,如-0.9、-0.8、0.2、0.9。
//取值越趋于-1,判定为语音的概率越大,亦即有可能更多噪声被当成语音被误识别。
//取值越趋于+1,判定为噪音的越多,亦即有可能更多语音段被当成噪音被拒绝识别。
//该参数属高级参数,调整需慎重和重点测试。
//transcriber.addCustomedParam("speech_noise_threshold",0.3);
//设置训练后的定制语言模型id。
//transcriber.addCustomedParam("customization_id","你的定制语言模型id");
//设置训练后的定制热词id。
//transcriber.addCustomedParam("vocabulary_id","你的定制热词id");
//设置是否忽略单句超时。
transcriber
.
addCustomedParam
(
"enable_ignore_sentence_timeout"
,
false
);
//vad断句开启后处理。
//transcriber.addCustomedParam("enable_vad_unify_post",false);
//此方法将以上参数设置序列化为JSON发送给服务端,并等待服务端确认。
transcriber
=
new
SpeechTranscriber
(
AppNslClient
.
instance
(),
listener
);
//设置识别参数
setParam
(
transcriber
);
//开始任务
transcriber
.
start
();
FileInputStream
fis
=
new
FileInputStream
(
speechFile
);
byte
[]
b
=
new
byte
[
3200
];
InputStream
inputStream
=
socket
.
getInputStream
(
);
byte
[]
b
=
new
byte
[
4096
];
int
len
;
while
((
len
=
fis
.
read
(
b
))
>
0
)
{
logger
.
info
(
"
send
data pack length: "
+
len
);
while
((
len
=
inputStream
.
read
(
b
))
>
0
)
{
logger
.
info
(
"
receive
data pack length: "
+
len
);
transcriber
.
send
(
b
,
len
);
//本案例用读取本地文件的形式模拟实时获取语音流并发送的,因为读取速度较快,这里需要设置sleep,如果实时获取语音则无需设置sleep
TimeUnit
.
MILLISECONDS
.
sleep
(
400
);
}
//通知服务端语音数据发送完毕,等待服务端处理完成。
long
now
=
System
.
currentTimeMillis
();
logger
.
info
(
"ASR wait for complete"
);
socket
.
close
();
//结束任务
transcriber
.
stop
();
logger
.
info
(
"ASR latency : "
+
(
System
.
currentTimeMillis
()
-
now
)
+
" ms
"
);
logger
.
warn
(
"语音转文字已结束
"
);
}
catch
(
Exception
e
)
{
logger
.
error
(
e
.
getMessage
());
}
finally
{
if
(
null
!=
transcriber
)
{
transcriber
.
close
();
}
if
(!
serverSocket
.
isClosed
())
{
try
{
serverSocket
.
close
();
}
catch
(
IOException
exception
)
{
exception
.
printStackTrace
();
logger
.
error
(
exception
.
getMessage
());
}
}
}
}
/**
* 设置识别参数
*/
private
void
setParam
(
SpeechTranscriber
transcriber
)
{
transcriber
.
setAppKey
(
SpeechConfig
.
AppKey
);
//输入音频编码方式。
transcriber
.
setFormat
(
InputFormatEnum
.
PCM
);
//输入音频采样率。
transcriber
.
setSampleRate
(
SampleRateEnum
.
SAMPLE_RATE_16K
);
//是否返回中间识别结果。
transcriber
.
setEnableIntermediateResult
(
true
);
//是否生成并返回标点符号。
transcriber
.
setEnablePunctuation
(
true
);
//是否将返回结果规整化,比如将一百返回为100。
transcriber
.
setEnableITN
(
true
);
//设置vad断句参数。默认值:800ms,有效值:200ms~2000ms。
transcriber
.
addCustomedParam
(
"max_sentence_silence"
,
500
);
//设置是否语义断句。
//transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
//设置是否开启顺滑。
//transcriber.addCustomedParam("disfluency",true);
//设置是否开启词模式。
transcriber
.
addCustomedParam
(
"enable_words"
,
true
);
//设置vad噪音阈值参数,参数取值为-1~+1,如-0.9、-0.8、0.2、0.9。
//取值越趋于-1,判定为语音的概率越大,亦即有可能更多噪声被当成语音被误识别。
//取值越趋于+1,判定为噪音的越多,亦即有可能更多语音段被当成噪音被拒绝识别。
//该参数属高级参数,调整需慎重和重点测试。
//transcriber.addCustomedParam("speech_noise_threshold",0.3);
//设置训练后的定制语言模型id。
//transcriber.addCustomedParam("customization_id","你的定制语言模型id");
//设置训练后的定制热词id。
//transcriber.addCustomedParam("vocabulary_id","你的定制热词id");
//设置是否忽略单句超时。
transcriber
.
addCustomedParam
(
"enable_ignore_sentence_timeout"
,
false
);
//vad断句开启后处理。
//transcriber.addCustomedParam("enable_vad_unify_post",false);
//此方法将以上参数设置序列化为JSON发送给服务端,并等待服务端确认。
}
}
amos-boot-utils/amos-boot-utils-speech/src/test/java/com/yejoin/amos/SpeechApplicationTests.java
View file @
5ebb50c4
...
...
@@ -4,7 +4,7 @@ import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse
;
import
com.yeejoin.amos.speech.SpeechFlashRecognizer
;
import
com.yeejoin.amos.speech.SpeechRecognizer
;
import
com.yeejoin.amos.speech.SpeechTranscriber
;
import
com.yeejoin.amos.speech.
App
SpeechTranscriber
;
import
okhttp3.Call
;
import
okhttp3.Callback
;
import
okhttp3.Response
;
...
...
@@ -23,14 +23,14 @@ public class SpeechApplicationTests {
*/
@Test
void
testSpeechTranscriber
()
{
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
/*
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
//将上面fileLink文件下载到本地后,替换filepath为本地地址测试
String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\out.pcm";
SpeechTranscriber
transcriber
=
new
SpeechTranscriber
(
new
File
(
filepath
),
new
SpeechTranscriberListener
()
{
/**
AppSpeechTranscriber transcriber = new AppSpeechTranscriber(
new SpeechTranscriberListener() {
*/
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
*/
/*
@Override
public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
logger.warn("语音识别过程中返回的结果");
...
...
@@ -46,9 +46,9 @@ public class SpeechApplicationTests {
", time: " + response.getTransSentenceTime());
}
/**
*/
/**
* 服务端准备好了进行识别
*/
*/
/*
@Override
public void onTranscriberStart(SpeechTranscriberResponse response) {
logger.warn("服务端准备好了进行识别");
...
...
@@ -57,9 +57,9 @@ public class SpeechApplicationTests {
+ ", status: " + response.getStatus());
}
/**
*/
/**
* 服务端检测到了一句话的开始
*/
*/
/*
@Override
public void onSentenceBegin(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的开始");
...
...
@@ -69,10 +69,10 @@ public class SpeechApplicationTests {
}
/**
*/
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
*/
/*
@Override
public void onSentenceEnd(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的结束");
...
...
@@ -92,9 +92,9 @@ public class SpeechApplicationTests {
", time: " + response.getTransSentenceTime());
}
/**
*/
/**
* 识别结束后返回的最终结果
*/
*/
/*
@Override
public void onTranscriptionComplete(SpeechTranscriberResponse response) {
logger.warn("识别结束后返回的最终结果");
...
...
@@ -104,9 +104,9 @@ public class SpeechApplicationTests {
+ ",result:" + response.getTransSentenceText());
}
/**
*/
/**
* 失败处理
*/
*/
/*
@Override
public void onFail(SpeechTranscriberResponse response) {
logger.error("失败处理");
...
...
@@ -115,7 +115,7 @@ public class SpeechApplicationTests {
+ ", status_text: " + response.getStatusText());
}
});
transcriber
.
process
();
transcriber.process();
*/
}
/**
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment