Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
amos-boot-biz
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
项目统一框架
amos-boot-biz
Commits
b9021a53
Commit
b9021a53
authored
Jun 24, 2022
by
chenzhao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
语音融合测试文件
parent
f6cf990a
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
230 additions
and
0 deletions
+230
-0
SpeechTranscriberDemo.java
...odule/jcs/biz/audioToText/util/SpeechTranscriberDemo.java
+230
-0
No files found.
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/util/SpeechTranscriberDemo.java
0 → 100644
View file @
b9021a53
package
com
.
yeejoin
.
amos
.
boot
.
module
.
jcs
.
biz
.
audioToText
.
util
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.IOException
;
import
java.net.DatagramPacket
;
import
java.net.DatagramSocket
;
import
java.net.InetAddress
;
import
com.alibaba.nls.client.AccessToken
;
import
com.alibaba.nls.client.protocol.InputFormatEnum
;
import
com.alibaba.nls.client.protocol.NlsClient
;
import
com.alibaba.nls.client.protocol.SampleRateEnum
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriber
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
/**
* 此示例演示了:
* ASR实时识别API调用。
* 动态获取token。
* 通过本地模拟实时流发送。
* 识别耗时计算。
*/
public
class
SpeechTranscriberDemo
{
private
String
appKey
;
private
NlsClient
client
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechTranscriberDemo
.
class
);
public
SpeechTranscriberDemo
(
String
appKey
,
String
id
,
String
secret
,
String
url
)
{
this
.
appKey
=
appKey
;
//应用全局创建一个NlsClient实例,默认服务地址为阿里云线上服务地址。
//获取token,实际使用时注意在accessToken.getExpireTime()过期前再次获取。
AccessToken
accessToken
=
new
AccessToken
(
id
,
secret
);
try
{
accessToken
.
apply
();
System
.
out
.
println
(
"get token: "
+
", expire time: "
+
accessToken
.
getExpireTime
());
if
(
url
.
isEmpty
())
{
client
=
new
NlsClient
(
accessToken
.
getToken
());
}
else
{
client
=
new
NlsClient
(
url
,
accessToken
.
getToken
());
}
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
}
private
static
SpeechTranscriberListener
getTranscriberListener
()
{
SpeechTranscriberListener
listener
=
new
SpeechTranscriberListener
()
{
//识别出中间结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
@Override
public
void
onTranscriptionResultChange
(
SpeechTranscriberResponse
response
)
{
System
.
out
.
println
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
@Override
public
void
onTranscriberStart
(
SpeechTranscriberResponse
response
)
{
//task_id是调用方和服务端通信的唯一标识,遇到问题时,需要提供此task_id。
System
.
out
.
println
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
@Override
public
void
onSentenceBegin
(
SpeechTranscriberResponse
response
)
{
System
.
out
.
println
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
//识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
@Override
public
void
onSentenceEnd
(
SpeechTranscriberResponse
response
)
{
System
.
out
.
println
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//置信度
", confidence: "
+
response
.
getConfidence
()
+
//开始时间
", begin_time: "
+
response
.
getSentenceBeginTime
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
//识别完毕
@Override
public
void
onTranscriptionComplete
(
SpeechTranscriberResponse
response
)
{
System
.
out
.
println
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
@Override
public
void
onFail
(
SpeechTranscriberResponse
response
)
{
//task_id是调用方和服务端通信的唯一标识,遇到问题时,需要提供此task_id。
System
.
out
.
println
(
"task_id: "
+
response
.
getTaskId
()
+
", status: "
+
response
.
getStatus
()
+
", status_text: "
+
response
.
getStatusText
());
}
};
return
listener
;
}
//根据二进制数据大小计算对应的同等语音长度。
//sampleRate:支持8000或16000。
public
static
int
getSleepDelta
(
int
dataSize
,
int
sampleRate
)
{
// 仅支持16位采样。
int
sampleBytes
=
16
;
// 仅支持单通道。
int
soundChannel
=
1
;
return
(
dataSize
*
10
*
8000
)
/
(
160
*
sampleRate
);
}
public
void
process
(
String
filepath
)
{
SpeechTranscriber
transcriber
=
null
;
try
{
//创建实例、建立连接。
transcriber
=
new
SpeechTranscriber
(
client
,
getTranscriberListener
());
transcriber
.
setAppKey
(
"89KKwpGXXN37Pn1G"
);
//输入音频编码方式。
transcriber
.
setFormat
(
InputFormatEnum
.
PCM
);
//输入音频采样率。
transcriber
.
setSampleRate
(
SampleRateEnum
.
SAMPLE_RATE_16K
);
//是否返回中间识别结果。
transcriber
.
setEnableIntermediateResult
(
false
);
//是否生成并返回标点符号。
transcriber
.
setEnablePunctuation
(
true
);
//是否将返回结果规整化,比如将一百返回为100。
transcriber
.
setEnableITN
(
false
);
//设置vad断句参数。默认值:800ms,有效值:200ms~2000ms。
//transcriber.addCustomedParam("max_sentence_silence", 600);
//设置是否语义断句。
//transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
//设置是否开启顺滑。
//transcriber.addCustomedParam("disfluency",true);
//设置是否开启词模式。
//transcriber.addCustomedParam("enable_words",true);
//设置vad噪音阈值参数,参数取值为-1~+1,如-0.9、-0.8、0.2、0.9。
//取值越趋于-1,判定为语音的概率越大,亦即有可能更多噪声被当成语音被误识别。
//取值越趋于+1,判定为噪音的越多,亦即有可能更多语音段被当成噪音被拒绝识别。
//该参数属高级参数,调整需慎重和重点测试。
//transcriber.addCustomedParam("speech_noise_threshold",0.3);
//设置训练后的定制语言模型id。
//transcriber.addCustomedParam("customization_id","你的定制语言模型id");
//设置训练后的定制热词id。
//transcriber.addCustomedParam("vocabulary_id","你的定制热词id");
//设置是否忽略单句超时。
transcriber
.
addCustomedParam
(
"enable_ignore_sentence_timeout"
,
false
);
//vad断句开启后处理。
//transcriber.addCustomedParam("enable_vad_unify_post",false);
//此方法将以上参数设置序列化为JSON发送给服务端,并等待服务端确认。
transcriber
.
start
();
File
file
=
new
File
(
filepath
);
FileInputStream
fis
=
new
FileInputStream
(
file
);
byte
[]
b
=
new
byte
[
332
];
int
len
;
DatagramSocket
datagramSocket
=
new
DatagramSocket
();
while
((
len
=
fis
.
read
(
b
))
>
0
)
{
// logger.info("send data pack length: " + len);
datagramSocket
.
send
(
new
DatagramPacket
(
b
,
b
.
length
,
InetAddress
.
getLocalHost
(),
25006
));
transcriber
.
send
(
b
,
len
);
//本案例用读取本地文件的形式模拟实时获取语音流并发送的,因为读取速度较快,这里需要设置sleep。
//如果实时获取语音则无需设置sleep, 如果是8k采样率语音第二个参数设置为8000。
int
deltaSleep
=
getSleepDelta
(
len
,
16000
);
Thread
.
sleep
(
deltaSleep
);
// TimeUnit.MILLISECONDS.sleep(100);
}
//通知服务端语音数据发送完毕,等待服务端处理完成。
long
now
=
System
.
currentTimeMillis
();
logger
.
info
(
"ASR wait for complete"
);
transcriber
.
stop
();
datagramSocket
.
close
();
logger
.
info
(
"ASR latency : "
+
(
System
.
currentTimeMillis
()
-
now
)
+
" ms"
);
}
catch
(
Exception
e
)
{
System
.
err
.
println
(
e
.
getMessage
());
}
finally
{
if
(
null
!=
transcriber
)
{
transcriber
.
close
();
}
}
}
public
void
shutdown
()
{
client
.
shutdown
();
}
public
static
void
main
(
String
[]
args
)
throws
Exception
{
String
appKey
=
"89KKwpGXXN37Pn1G"
;
String
id
=
"LTAI5t8F2oYwmfoYXjCx5vbf"
;
String
secret
=
"du6jOpdxlKNCkCo5QN6EVFiI5zSaAv"
;
String
url
=
"wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1"
;
// 默认值:wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1。
/* if (args.length == 3) {
appKey = args[0];
id = args[1];
secret = args[2];
} else if (args.length == 4) {
appKey = args[0];
id = args[1];
secret = args[2];
url = args[3];
} else {
System.err.println("run error, need params(url is optional): " + "<app-key> <AccessKeyId> <AccessKeySecret> [url]");
System.exit(-1);
}*/
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String
filepath
=
"C:\\Users\\DELL\\Desktop\\ffmpeg-4.4-full_build-shared\\bin\\jc06102.pcm"
;
SpeechTranscriberDemo
demo
=
new
SpeechTranscriberDemo
(
appKey
,
id
,
secret
,
url
);
demo
.
process
(
filepath
);
demo
.
shutdown
();
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment