Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
amos-boot-biz
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
项目统一框架
amos-boot-biz
Commits
54766f8a
Commit
54766f8a
authored
Nov 30, 2021
by
helinlin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加阿里云音频识别demo
parent
74ea674a
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
676 additions
and
16 deletions
+676
-16
pom.xml
amos-boot-utils/amos-boot-utils-speech/pom.xml
+39
-0
SpeechApplication.java
...ech/src/main/java/com/yeejoin/amos/SpeechApplication.java
+13
-0
HttpUtil.java
.../src/main/java/com/yeejoin/amos/speech/util/HttpUtil.java
+61
-0
SpeechFlashRecognizerDemo.java
...m/yeejoin/amos/speech/util/SpeechFlashRecognizerDemo.java
+78
-0
SpeechRecognizerDemo.java
...va/com/yeejoin/amos/speech/util/SpeechRecognizerDemo.java
+204
-0
SpeechTranscriberDemo.java
...a/com/yeejoin/amos/speech/util/SpeechTranscriberDemo.java
+217
-0
SpeechUtil.java
...rc/main/java/com/yeejoin/amos/speech/util/SpeechUtil.java
+45
-0
application.properties
...ot-utils-speech/src/main/resources/application.properties
+0
-0
pom.xml
amos-boot-utils/pom.xml
+19
-16
No files found.
amos-boot-utils/amos-boot-utils-speech/pom.xml
0 → 100644
View file @
54766f8a
<?xml version="1.0" encoding="UTF-8"?>
<project
xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<modelVersion>
4.0.0
</modelVersion>
<parent>
<artifactId>
amos-boot-utils
</artifactId>
<groupId>
com.amosframework.boot
</groupId>
<version>
1.0.0
</version>
</parent>
<artifactId>
amos-boot-utils-speech
</artifactId>
<dependencies>
<dependency>
<groupId>
com.amosframework.boot
</groupId>
<artifactId>
amos-boot-core
</artifactId>
<version>
${amos-biz-boot.version}
</version>
</dependency>
<dependency>
<groupId>
com.alibaba
</groupId>
<artifactId>
fastjson
</artifactId>
<version>
1.2.73
</version>
</dependency>
<dependency>
<groupId>
com.aliyun
</groupId>
<artifactId>
aliyun-java-sdk-core
</artifactId>
<version>
3.7.1
</version>
</dependency>
<dependency>
<groupId>
com.alibaba.nls
</groupId>
<artifactId>
nls-sdk-transcriber
</artifactId>
<version>
2.2.1
</version>
</dependency>
</dependencies>
</project>
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/SpeechApplication.java
0 → 100644
View file @
54766f8a
package
com
.
yeejoin
.
amos
;
import
org.springframework.boot.SpringApplication
;
import
org.springframework.boot.autoconfigure.SpringBootApplication
;
@SpringBootApplication
public
class
SpeechApplication
{
public
static
void
main
(
String
[]
args
)
{
SpringApplication
.
run
(
SpeechApplication
.
class
,
args
);
}
}
\ No newline at end of file
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/HttpUtil.java
0 → 100644
View file @
54766f8a
package
com
.
yeejoin
.
amos
.
speech
.
util
;
import
okhttp3.*
;
import
java.io.File
;
import
java.io.IOException
;
import
java.net.SocketTimeoutException
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.concurrent.TimeUnit
;
/**
* okHttp客户端
*/
public
class
HttpUtil
{
static
OkHttpClient
client
=
new
OkHttpClient
.
Builder
()
.
connectTimeout
(
20
,
TimeUnit
.
SECONDS
)
.
readTimeout
(
60
,
TimeUnit
.
SECONDS
)
.
writeTimeout
(
60
,
TimeUnit
.
SECONDS
)
.
build
();
public
static
String
sendPostFile
(
String
url
,
HashMap
<
String
,
String
>
headers
,
File
speechFile
)
{
RequestBody
body
;
if
(!
speechFile
.
isFile
())
{
System
.
err
.
println
(
"The filePath is not a file: "
+
speechFile
.
getPath
());
return
null
;
}
else
{
body
=
RequestBody
.
create
(
MediaType
.
parse
(
"application/octet-stream"
),
speechFile
);
}
Headers
.
Builder
hb
=
new
Headers
.
Builder
();
if
(
headers
!=
null
&&
!
headers
.
isEmpty
())
{
for
(
Map
.
Entry
<
String
,
String
>
entry
:
headers
.
entrySet
())
{
hb
.
add
(
entry
.
getKey
(),
entry
.
getValue
());
}
}
Request
request
=
new
Request
.
Builder
()
.
url
(
url
)
.
headers
(
hb
.
build
())
.
post
(
body
)
.
build
();
return
getResponseWithTimeout
(
request
);
}
private
static
String
getResponseWithTimeout
(
Request
request
)
{
String
result
=
null
;
try
{
Response
s
=
client
.
newCall
(
request
).
execute
();
assert
s
.
body
()
!=
null
;
result
=
s
.
body
().
string
();
s
.
close
();
}
catch
(
SocketTimeoutException
e
)
{
System
.
err
.
println
(
"get result timeout"
);
}
catch
(
IOException
e
)
{
System
.
err
.
println
(
"get result error "
+
e
.
getMessage
());
}
return
result
;
}
}
\ No newline at end of file
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechFlashRecognizerDemo.java
0 → 100644
View file @
54766f8a
package
com
.
yeejoin
.
amos
.
speech
.
util
;
import
java.io.File
;
import
java.io.IOException
;
import
java.util.HashMap
;
/**
* 录音文件识别极速版(提交音频文件识别,文件大小,文件时长有限制)
* <p>
* 音频格式:支持AAC/MP3/OPUS/WAV格式编码的音频。
* 使用限制:支持100 MB以内且不超过2小时的音频文件的识别。
*
* @see SpeechRecognizerDemo
* 时长超过2小时的文件请使用录音文件识别普通版
*
* <p>
* 模型类型:8000(电话)/16000(非电话)。
*/
public
class
SpeechFlashRecognizerDemo
{
private
final
File
speechFile
;
/**
* 不支持试用版
*/
public
static
void
main
(
String
[]
args
)
{
//String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
// 重要:此处用一个本地文件来模拟发送实时流数据,实际使用时,您可以从某处实时采集或接收语音流并发送到ASR服务端。
String
fileName
=
"D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3"
;
String
format
=
"mp3"
;
int
sampleRate
=
16000
;
SpeechFlashRecognizerDemo
demo
=
new
SpeechFlashRecognizerDemo
(
new
File
(
fileName
));
try
{
demo
.
process
(
SpeechUtil
.
getToken
(),
format
,
sampleRate
);
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
}
public
SpeechFlashRecognizerDemo
(
File
speechFile
)
{
this
.
speechFile
=
speechFile
;
}
/**
* 设置HTTPS REST POST请求
* 1.使用http协议
* 2.语音识别服务域名:nls-gateway.cn-shanghai.aliyuncs.com
* 3.语音识别接口请求路径:/stream/v1/FlashRecognizer
* 4.设置必须请求参数:appkey、token、format、sample_rate
*/
public
void
process
(
String
token
,
String
format
,
int
sampleRate
)
{
String
request
=
"https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/FlashRecognizer"
;
request
=
request
+
"?appkey="
+
SpeechUtil
.
AppKey
;
request
=
request
+
"&token="
+
token
;
request
=
request
+
"&format="
+
format
;
request
=
request
+
"&sample_rate="
+
sampleRate
;
System
.
out
.
println
(
"Request: "
+
request
);
/**
* 设置HTTPS头部字段
*
* 1.Content-Type:application/octet-stream
*/
HashMap
<
String
,
String
>
headers
=
new
HashMap
<>();
headers
.
put
(
"Content-Type"
,
"application/octet-stream"
);
/**
* 发送HTTPS POST请求,返回服务端的响应。
*/
long
start
=
System
.
currentTimeMillis
();
String
response
=
HttpUtil
.
sendPostFile
(
request
,
headers
,
this
.
speechFile
);
System
.
out
.
println
(
"latency = "
+
(
System
.
currentTimeMillis
()
-
start
)
+
" ms"
);
if
(
response
!=
null
)
{
System
.
out
.
println
(
"Response: "
+
response
);
}
else
{
System
.
err
.
println
(
"识别失败!"
);
}
}
}
\ No newline at end of file
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechRecognizerDemo.java
0 → 100644
View file @
54766f8a
package
com
.
yeejoin
.
amos
.
speech
.
util
;
import
com.alibaba.fastjson.JSONObject
;
import
com.aliyuncs.CommonRequest
;
import
com.aliyuncs.CommonResponse
;
import
com.aliyuncs.DefaultAcsClient
;
import
com.aliyuncs.IAcsClient
;
import
com.aliyuncs.exceptions.ClientException
;
import
com.aliyuncs.http.MethodType
;
import
com.aliyuncs.profile.DefaultProfile
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.util.concurrent.TimeUnit
;
/**
* 录音文件识别普通版(提供外网访问的音频文件地址)
* <p>
* 识别的文件需要提交基于HTTP可访问的URL地址,可以通过URL访问,不支持提交本地文件
* 上传的录音文件URL的访问权限需要设置为公开,URL中只能使用域名不能使用IP地址、不可包含空格
*
* @see SpeechFlashRecognizerDemo
* (内网考虑使用录音文件识别极速版,极速版可以提交文件识别,但极速版不提供试用)
* <p>
* 支持单轨/双轨的.wav、.mp3、.m4a、.wma、.aac、.ogg、.amr、.flac格式录音文件识别
* 文件大小需控制在512 MB以下
* 免费用户每日可识别不超过2小时时长的录音文件
* 提交录音文件识别请求后,免费用户的识别任务在24小时内完成并返回识别文本; 付费用户的识别任务在6小时内完成并返回识别文本。识别结果在服务端可保存72小时
*/
public
class
SpeechRecognizerDemo
{
// 地域ID,常量,固定值。
public
static
final
String
REGIONID
=
"cn-shanghai"
;
public
static
final
String
ENDPOINTNAME
=
"cn-shanghai"
;
public
static
final
String
PRODUCT
=
"nls-filetrans"
;
public
static
final
String
DOMAIN
=
"filetrans.cn-shanghai.aliyuncs.com"
;
public
static
final
String
API_VERSION
=
"2018-08-17"
;
public
static
final
String
POST_REQUEST_ACTION
=
"SubmitTask"
;
public
static
final
String
GET_REQUEST_ACTION
=
"GetTaskResult"
;
// 请求参数
public
static
final
String
KEY_APP_KEY
=
"appkey"
;
public
static
final
String
KEY_FILE_LINK
=
"file_link"
;
public
static
final
String
KEY_VERSION
=
"version"
;
public
static
final
String
KEY_ENABLE_WORDS
=
"enable_words"
;
// 响应参数
public
static
final
String
KEY_TASK
=
"Task"
;
public
static
final
String
KEY_TASK_ID
=
"TaskId"
;
public
static
final
String
KEY_STATUS_TEXT
=
"StatusText"
;
public
static
final
String
KEY_RESULT
=
"Result"
;
// 状态值
public
static
final
String
STATUS_SUCCESS
=
"SUCCESS"
;
private
static
final
String
STATUS_RUNNING
=
"RUNNING"
;
private
static
final
String
STATUS_QUEUEING
=
"QUEUEING"
;
// 阿里云鉴权client
IAcsClient
client
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechRecognizerDemo
.
class
);
public
static
void
main
(
String
[]
args
)
throws
Exception
{
String
fileLink
=
"https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav"
;
SpeechRecognizerDemo
demo
=
new
SpeechRecognizerDemo
();
// 第一步:提交录音文件识别请求,获取任务ID用于后续的识别结果轮询。
String
taskId
=
demo
.
submitFileTransRequest
(
fileLink
);
if
(
taskId
!=
null
)
{
logger
.
info
(
"录音文件识别请求成功,task_id: "
+
taskId
);
}
else
{
logger
.
error
(
"录音文件识别请求失败!"
);
return
;
}
// 第二步:根据任务ID轮询识别结果。
String
result
=
demo
.
getFileTransResult
(
taskId
);
if
(
result
!=
null
)
{
logger
.
info
(
"录音文件识别结果查询成功:"
+
result
);
}
else
{
logger
.
error
(
"录音文件识别结果查询失败!"
);
}
}
public
SpeechRecognizerDemo
()
{
// 设置endpoint
try
{
DefaultProfile
.
addEndpoint
(
ENDPOINTNAME
,
REGIONID
,
PRODUCT
,
DOMAIN
);
}
catch
(
ClientException
e
)
{
e
.
printStackTrace
();
}
// 创建DefaultAcsClient实例并初始化
DefaultProfile
profile
=
DefaultProfile
.
getProfile
(
REGIONID
,
SpeechUtil
.
AccessKeId
,
SpeechUtil
.
AccessKeySecret
);
this
.
client
=
new
DefaultAcsClient
(
profile
);
}
/**
* 提交录音文件
*/
public
String
submitFileTransRequest
(
String
fileLink
)
{
/**
* 1. 创建CommonRequest,设置请求参数。
*/
CommonRequest
postRequest
=
new
CommonRequest
();
// 设置域名
postRequest
.
setDomain
(
DOMAIN
);
// 设置API的版本号,格式为YYYY-MM-DD。
postRequest
.
setVersion
(
API_VERSION
);
// 设置action
postRequest
.
setAction
(
POST_REQUEST_ACTION
);
// 设置产品名称
postRequest
.
setProduct
(
PRODUCT
);
/**
* 2. 设置录音文件识别请求参数,以JSON字符串的格式设置到请求Body中。
*/
JSONObject
taskObject
=
new
JSONObject
();
// 设置appkey
taskObject
.
put
(
KEY_APP_KEY
,
SpeechUtil
.
AppKey
);
// 设置音频文件访问链接
taskObject
.
put
(
KEY_FILE_LINK
,
fileLink
);
// 新接入请使用4.0版本,已接入(默认2.0)如需维持现状,请注释掉该参数设置。
taskObject
.
put
(
KEY_VERSION
,
"4.0"
);
// 设置是否输出词信息,默认为false,开启时需要设置version为4.0及以上。
taskObject
.
put
(
KEY_ENABLE_WORDS
,
true
);
String
task
=
taskObject
.
toJSONString
();
logger
.
info
(
task
);
// 设置以上JSON字符串为Body参数。
postRequest
.
putBodyParameter
(
KEY_TASK
,
task
);
// 设置为POST方式的请求。
postRequest
.
setMethod
(
MethodType
.
POST
);
/**
* 3. 提交录音文件识别请求,获取录音文件识别请求任务的ID,以供识别结果查询使用。
*/
String
taskId
=
null
;
try
{
CommonResponse
postResponse
=
client
.
getCommonResponse
(
postRequest
);
logger
.
warn
(
"提交录音文件识别请求的响应:"
+
postResponse
.
getData
());
if
(
postResponse
.
getHttpStatus
()
==
200
)
{
JSONObject
result
=
JSONObject
.
parseObject
(
postResponse
.
getData
());
String
statusText
=
result
.
getString
(
KEY_STATUS_TEXT
);
if
(
STATUS_SUCCESS
.
equals
(
statusText
))
{
taskId
=
result
.
getString
(
KEY_TASK_ID
);
}
}
}
catch
(
ClientException
e
)
{
e
.
printStackTrace
();
}
return
taskId
;
}
/**
* 根据任务ID轮询识别结果
*/
public
String
getFileTransResult
(
String
taskId
)
{
/**
* 1. 创建CommonRequest,设置任务ID。
*/
CommonRequest
getRequest
=
new
CommonRequest
();
// 设置域名
getRequest
.
setDomain
(
DOMAIN
);
// 设置API版本
getRequest
.
setVersion
(
API_VERSION
);
// 设置action
getRequest
.
setAction
(
GET_REQUEST_ACTION
);
// 设置产品名称
getRequest
.
setProduct
(
PRODUCT
);
// 设置任务ID为查询参数
getRequest
.
putQueryParameter
(
KEY_TASK_ID
,
taskId
);
// 设置为GET方式的请求
getRequest
.
setMethod
(
MethodType
.
GET
);
/**
* 2. 提交录音文件识别结果查询请求
* 以轮询的方式进行识别结果的查询,直到服务端返回的状态描述为“SUCCESS”或错误描述,则结束轮询。
*/
String
result
=
null
;
while
(
true
)
{
try
{
CommonResponse
getResponse
=
client
.
getCommonResponse
(
getRequest
);
logger
.
warn
(
"识别查询结果:"
+
getResponse
.
getData
());
if
(
getResponse
.
getHttpStatus
()
!=
200
)
{
break
;
}
JSONObject
rootObj
=
JSONObject
.
parseObject
(
getResponse
.
getData
());
String
statusText
=
rootObj
.
getString
(
KEY_STATUS_TEXT
);
if
(
STATUS_RUNNING
.
equals
(
statusText
)
||
STATUS_QUEUEING
.
equals
(
statusText
))
{
// 继续轮询,注意设置轮询时间间隔。
TimeUnit
.
SECONDS
.
sleep
(
3
);
}
else
{
// 状态信息为成功,返回识别结果;状态信息为异常,返回空。
if
(
STATUS_SUCCESS
.
equals
(
statusText
))
{
result
=
rootObj
.
getString
(
KEY_RESULT
);
// 状态信息为成功,但没有识别结果,则可能是由于文件里全是静音、噪音等导致识别为空。
if
(
result
==
null
)
{
result
=
""
;
}
}
break
;
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
return
result
;
}
}
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechTranscriberDemo.java
0 → 100644
View file @
54766f8a
package
com
.
yeejoin
.
amos
.
speech
.
util
;
import
com.alibaba.nls.client.protocol.InputFormatEnum
;
import
com.alibaba.nls.client.protocol.NlsClient
;
import
com.alibaba.nls.client.protocol.SampleRateEnum
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriber
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.IOException
;
import
java.util.concurrent.TimeUnit
;
import
static
com
.
yeejoin
.
amos
.
speech
.
util
.
SpeechUtil
.
AppKey
;
/**
* 实时语音识别
* <p>
* 支持的输入格式:PCM(无压缩的PCM或WAV文件)、16 bit采样位数、单声道(mono)。
* 支持的音频采样率:8000 Hz和16000 Hz。
*/
public
class
SpeechTranscriberDemo
{
private
final
NlsClient
client
;
private
final
File
speechFile
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechTranscriberDemo
.
class
);
public
static
void
main
(
String
[]
args
)
{
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String
filepath
=
"D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav"
;
SpeechTranscriberDemo
transcriberDemo
=
null
;
try
{
transcriberDemo
=
new
SpeechTranscriberDemo
(
new
File
(
filepath
));
transcriberDemo
.
process
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
}
public
SpeechTranscriberDemo
(
File
speechFile
)
throws
IOException
{
if
(
speechFile
!=
null
&&
speechFile
.
exists
()
&&
speechFile
.
isFile
())
{
this
.
speechFile
=
speechFile
;
// 默认值:wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1。
//应用全局创建一个NlsClient实例,默认服务地址为阿里云线上服务地址。
client
=
new
NlsClient
(
SpeechUtil
.
getToken
());
}
else
{
throw
new
IllegalArgumentException
(
"识别文件异常"
);
}
}
//根据二进制数据大小计算对应的同等语音长度。
//sampleRate:支持8000或16000。
public
static
int
getSleepDelta
(
int
dataSize
,
int
sampleRate
)
{
// 仅支持16位采样。
int
sampleBytes
=
16
;
// 仅支持单通道。
int
soundChannel
=
1
;
return
(
dataSize
*
10
*
8000
)
/
(
160
*
sampleRate
);
}
public
void
process
()
{
SpeechTranscriber
transcriber
=
null
;
try
{
//创建实例、建立连接。
transcriber
=
new
SpeechTranscriber
(
client
,
new
SpeechTranscriberListener
()
{
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public
void
onTranscriptionResultChange
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"语音识别过程中返回的结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 服务端准备好了进行识别
*/
@Override
public
void
onTranscriberStart
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端准备好了进行识别"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public
void
onSentenceBegin
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的开始"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public
void
onSentenceEnd
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的结束"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//置信度
", confidence: "
+
response
.
getConfidence
()
+
//开始时间
", begin_time: "
+
response
.
getSentenceBeginTime
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public
void
onTranscriptionComplete
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"识别结束后返回的最终结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
()
+
",result:"
+
response
.
getTransSentenceText
());
}
/**
* 失败处理
*/
@Override
public
void
onFail
(
SpeechTranscriberResponse
response
)
{
logger
.
error
(
"失败处理"
);
logger
.
error
(
"task_id: "
+
response
.
getTaskId
()
+
", status: "
+
response
.
getStatus
()
+
", status_text: "
+
response
.
getStatusText
());
}
});
transcriber
.
setAppKey
(
AppKey
);
//输入音频编码方式。
transcriber
.
setFormat
(
InputFormatEnum
.
PCM
);
//输入音频采样率。
transcriber
.
setSampleRate
(
SampleRateEnum
.
SAMPLE_RATE_16K
);
//是否返回中间识别结果。
transcriber
.
setEnableIntermediateResult
(
true
);
//是否生成并返回标点符号。
transcriber
.
setEnablePunctuation
(
true
);
//是否将返回结果规整化,比如将一百返回为100。
transcriber
.
setEnableITN
(
false
);
//设置vad断句参数。默认值:800ms,有效值:200ms~2000ms。
//transcriber.addCustomedParam("max_sentence_silence", 600);
//设置是否语义断句。
//transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
//设置是否开启顺滑。
//transcriber.addCustomedParam("disfluency",true);
//设置是否开启词模式。
//transcriber.addCustomedParam("enable_words",true);
//设置vad噪音阈值参数,参数取值为-1~+1,如-0.9、-0.8、0.2、0.9。
//取值越趋于-1,判定为语音的概率越大,亦即有可能更多噪声被当成语音被误识别。
//取值越趋于+1,判定为噪音的越多,亦即有可能更多语音段被当成噪音被拒绝识别。
//该参数属高级参数,调整需慎重和重点测试。
//transcriber.addCustomedParam("speech_noise_threshold",0.3);
//设置训练后的定制语言模型id。
//transcriber.addCustomedParam("customization_id","你的定制语言模型id");
//设置训练后的定制热词id。
//transcriber.addCustomedParam("vocabulary_id","你的定制热词id");
//设置是否忽略单句超时。
transcriber
.
addCustomedParam
(
"enable_ignore_sentence_timeout"
,
false
);
//vad断句开启后处理。
//transcriber.addCustomedParam("enable_vad_unify_post",false);
//此方法将以上参数设置序列化为JSON发送给服务端,并等待服务端确认。
transcriber
.
start
();
FileInputStream
fis
=
new
FileInputStream
(
speechFile
);
byte
[]
b
=
new
byte
[
3200
];
int
len
;
while
((
len
=
fis
.
read
(
b
))
>
0
)
{
logger
.
info
(
"send data pack length: "
+
len
);
transcriber
.
send
(
b
,
len
);
//本案例用读取本地文件的形式模拟实时获取语音流并发送的,因为读取速度较快,这里需要设置sleep。
//如果实时获取语音则无需设置sleep, 如果是8k采样率语音第二个参数设置为8000。
int
deltaSleep
=
getSleepDelta
(
len
,
16000
);
TimeUnit
.
MILLISECONDS
.
sleep
(
deltaSleep
);
}
//通知服务端语音数据发送完毕,等待服务端处理完成。
long
now
=
System
.
currentTimeMillis
();
logger
.
info
(
"ASR wait for complete"
);
transcriber
.
stop
();
logger
.
info
(
"ASR latency : "
+
(
System
.
currentTimeMillis
()
-
now
)
+
" ms"
);
}
catch
(
Exception
e
)
{
logger
.
error
(
e
.
getMessage
());
}
finally
{
if
(
null
!=
transcriber
)
{
transcriber
.
close
();
}
if
(
client
!=
null
)
{
client
.
shutdown
();
}
}
}
}
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechUtil.java
0 → 100644
View file @
54766f8a
package
com
.
yeejoin
.
amos
.
speech
.
util
;
import
com.alibaba.nls.client.AccessToken
;
import
java.io.IOException
;
/**
* 阿里云语音识别
* <p>
* 试用版
* <p>
* 自2020年03月01日起,免费试用版规则调整为如下内容:
* 试用版不计费,如有变化,请关注官网通知。
* 一句话识别、实时语音识别、语音合成每自然日内支持2个并发调用,每自然日用量不限,免费试用期限为三个月。
* 录音文件识别每自然日识别时长不超过2小时,免费试用期限为三个月。
* 长文本语音合成和录音文件识别极速版均不支持试用版,如需使用请开通商用版本。
* <p>
* 商用版
* <p>
* 2019年06月10日零时后自动调整为如上新规计费方式。
* 对于录音文件识别,若您需要超过2个小时或更长的接口使用时长,请开通商用版。
* 开通商用版默认为后付费方式。购买预付费资源包后,自动变更为预付费方式,并优先使用资源包内资源进行抵扣,当预付费资源包内资源使用完后,会再次变更为后付费方式。
* 商用版按每自然日使用量计费,无免费额度,不使用则不产生费用。北京时间每日24时,系统将自动对当日用量进行全量计算并计费,账单生成将稍有延迟。
* 开通商用版后,不建议再次降配为试用版。商用版降配试用版后,可用并发数将置为0,服务将无法继续使用。
*/
public
class
SpeechUtil
{
/**
* 测试信息
*/
public
static
final
String
AccessKeId
=
"LTAI5t7mGN6dYoCwMdKiLTgt"
;
public
static
final
String
AccessKeySecret
=
"0LYdEnvKzQxBg0lpIahDp5rzB2r4Dp"
;
public
static
final
String
AppKey
=
"EG5fJBBIqkNMj6bM"
;
/**
* 获取访问Token(测试Token过期时间为18天)
*
* @return token
*/
public
static
String
getToken
()
throws
IOException
{
AccessToken
accessToken
=
new
AccessToken
(
AccessKeId
,
AccessKeySecret
);
accessToken
.
apply
();
System
.
out
.
println
(
"get token: "
+
accessToken
.
getToken
()
+
", expire time: "
+
accessToken
.
getExpireTime
());
return
accessToken
.
getToken
();
}
}
amos-boot-utils/amos-boot-utils-speech/src/main/resources/application.properties
0 → 100644
View file @
54766f8a
amos-boot-utils/pom.xml
View file @
54766f8a
<?xml version="1.0" encoding="UTF-8"?>
<?xml version="1.0" encoding="UTF-8"?>
<project
xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<project
xmlns=
"http://maven.apache.org/POM/4.0.0"
xmlns:xsi=
"http://www.w3.org/2001/XMLSchema-instance"
<modelVersion>
4.0.0
</modelVersion>
xsi:schemaLocation=
"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
>
<parent>
<modelVersion>
4.0.0
</modelVersion>
<artifactId>
amos-biz-boot
</artifactId>
<parent>
<groupId>
com.amosframework.boot
</groupId>
<artifactId>
amos-biz-boot
</artifactId>
<version>
1.0.0
</version>
<groupId>
com.amosframework.boot
</groupId>
</parent>
<version>
1.0.0
</version>
</parent>
<artifactId>
amos-boot-utils
</artifactId>
<artifactId>
amos-boot-utils
</artifactId>
<packaging>
pom
</packaging>
<packaging>
pom
</packaging>
<dependencies>
<dependencies>
</dependencies>
</dependencies>
<modules>
<modules>
<module>
amos-boot-utils-jpush
</module>
<module>
amos-boot-utils-jpush
</module>
<module>
amos-boot-utils-video
</module>
<module>
amos-boot-utils-video
</module>
</modules>
<module>
amos-boot-utils-speech
</module>
</modules>
</project>
</project>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment