Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
amos-boot-biz
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
项目统一框架
amos-boot-biz
Commits
8c96d81b
Commit
8c96d81b
authored
Nov 30, 2021
by
helinlin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加阿里云音频识别demo
parent
da3aa95c
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
186 additions
and
157 deletions
+186
-157
pom.xml
amos-boot-utils/amos-boot-utils-speech/pom.xml
+5
-0
SpeechFlashRecognizer.java
...n/java/com/yeejoin/amos/speech/SpeechFlashRecognizer.java
+9
-35
SpeechRecognizer.java
...c/main/java/com/yeejoin/amos/speech/SpeechRecognizer.java
+2
-20
SpeechTranscriber.java
.../main/java/com/yeejoin/amos/speech/SpeechTranscriber.java
+1
-102
SpeechApplicationTests.java
...src/test/java/com/yejoin/amos/SpeechApplicationTests.java
+169
-0
No files found.
amos-boot-utils/amos-boot-utils-speech/pom.xml
View file @
8c96d81b
...
...
@@ -34,6 +34,11 @@
<artifactId>
nls-sdk-transcriber
</artifactId>
<version>
2.2.1
</version>
</dependency>
<dependency>
<groupId>
org.springframework.boot
</groupId>
<artifactId>
spring-boot-starter-test
</artifactId>
<scope>
test
</scope>
</dependency>
</dependencies>
</project>
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/SpeechFlashRecognizer.java
View file @
8c96d81b
package
com
.
yeejoin
.
amos
.
speech
;
import
okhttp3.Call
;
import
okhttp3.Callback
;
import
okhttp3.Response
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.io.File
;
import
java.io.IOException
;
import
java.util.HashMap
;
/**
* 不支持免费用户测试
* 录音文件识别极速版(支持提交音频文件识别,文件大小,文件时长有限制)
* <p>
* 音频格式:支持AAC/MP3/OPUS/WAV格式编码的音频。
...
...
@@ -35,45 +33,21 @@ public class SpeechFlashRecognizer {
//识别回调函数
private
final
Callback
callback
;
/**
* 使用示例
* 不支持试用,需要付费
* 不支持试用,需要付费
* 不支持试用,需要付费
*/
public
static
void
main
(
String
[]
args
)
{
//String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
// 重要:此处用一个本地文件来模拟发送实时流数据,实际使用时,您可以从某处实时采集或接收语音流并发送到ASR服务端。
String
fileName
=
"D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3"
;
String
format
=
"mp3"
;
int
sampleRate
=
16000
;
SpeechFlashRecognizer
speechFlashRecognizer
=
new
SpeechFlashRecognizer
(
new
File
(
fileName
),
format
,
sampleRate
,
new
Callback
()
{
@Override
public
void
onFailure
(
Call
call
,
IOException
e
)
{
logger
.
error
(
"语音识别失败,原因:"
+
e
.
getMessage
());
}
@Override
public
void
onResponse
(
Call
call
,
Response
response
)
throws
IOException
{
assert
response
.
body
()
!=
null
;
logger
.
warn
(
"语音识别结果:"
+
response
.
body
().
string
());
}
});
speechFlashRecognizer
.
process
();
}
public
SpeechFlashRecognizer
(
File
speechFile
,
String
format
,
int
sampleRate
,
Callback
callback
)
{
if
(
speechFile
!=
null
&&
speechFile
.
exists
()
&&
speechFile
.
isFile
())
{
public
SpeechFlashRecognizer
(
File
speechFile
,
int
sampleRate
,
Callback
callback
)
{
if
(
speechFile
==
null
||
!
speechFile
.
exists
()
||
!
speechFile
.
isFile
())
{
throw
new
IllegalArgumentException
(
"待识别的文件存在异常"
);
}
if
(
null
==
format
||
""
.
equals
(
format
))
{
throw
new
IllegalArgumentException
(
"音频文件格式不能为空"
);
}
if
(
sampleRate
==
0
)
{
throw
new
IllegalArgumentException
(
"音频采样率不能为0"
);
}
String
[]
split
=
speechFile
.
getName
().
split
(
"\\."
);
if
(
split
.
length
!=
0
)
{
this
.
format
=
split
[
split
.
length
-
1
];
}
else
{
throw
new
IllegalArgumentException
(
"音频文件格式提取失败"
);
}
this
.
speechFile
=
speechFile
;
this
.
format
=
format
;
this
.
sampleRate
=
sampleRate
;
this
.
callback
=
callback
;
}
...
...
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/SpeechRecognizer.java
View file @
8c96d81b
...
...
@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
import
java.util.concurrent.TimeUnit
;
/**
* 不支持提交文件
* 录音文件识别普通版(提供外网访问的音频文件地址)
* <p>
* 识别的文件需要提交基于HTTP可访问的URL地址,可以通过URL访问,不支持提交本地文件
...
...
@@ -59,25 +60,6 @@ public class SpeechRecognizer {
IAcsClient
client
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechRecognizer
.
class
);
public
static
void
main
(
String
[]
args
)
throws
Exception
{
String
fileLink
=
"https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav"
;
SpeechRecognizer
speechRecognizer
=
new
SpeechRecognizer
();
// 第一步:提交录音文件识别请求,获取任务ID用于后续的识别结果轮询。
String
taskId
=
speechRecognizer
.
submitFileTransRequest
(
fileLink
);
if
(
taskId
!=
null
)
{
logger
.
info
(
"录音文件识别请求成功,task_id: "
+
taskId
);
}
else
{
logger
.
error
(
"录音文件识别请求失败!"
);
return
;
}
// 第二步:根据任务ID轮询识别结果。
String
result
=
speechRecognizer
.
getFileTransResult
(
taskId
);
if
(
result
!=
null
)
{
logger
.
info
(
"录音文件识别结果查询成功:"
+
result
);
}
else
{
logger
.
error
(
"录音文件识别结果查询失败!"
);
}
}
public
SpeechRecognizer
()
{
// 设置endpoint
...
...
@@ -92,7 +74,7 @@ public class SpeechRecognizer {
}
/**
* 提交录音文件
* 提交录音文件
地址
*/
public
String
submitFileTransRequest
(
String
fileLink
)
{
/**
...
...
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/SpeechTranscriber.java
View file @
8c96d81b
...
...
@@ -26,109 +26,8 @@ public class SpeechTranscriber {
//识别回调函数
private
final
SpeechTranscriberListener
listener
;
/**
* 使用示例
* 提供音频留地址
*/
public
static
void
main
(
String
[]
args
)
{
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String
fileLink
=
"https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav"
;
//将上面fileLink文件下载到本地后,替换filepath为本地地址测试
String
filepath
=
"D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav"
;
SpeechTranscriber
transcriber
=
new
SpeechTranscriber
(
new
File
(
filepath
),
new
SpeechTranscriberListener
()
{
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public
void
onTranscriptionResultChange
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"语音识别过程中返回的结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 服务端准备好了进行识别
*/
@Override
public
void
onTranscriberStart
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端准备好了进行识别"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public
void
onSentenceBegin
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的开始"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public
void
onSentenceEnd
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的结束"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//置信度
", confidence: "
+
response
.
getConfidence
()
+
//开始时间
", begin_time: "
+
response
.
getSentenceBeginTime
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public
void
onTranscriptionComplete
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"识别结束后返回的最终结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
()
+
",result:"
+
response
.
getTransSentenceText
());
}
/**
* 失败处理
*/
@Override
public
void
onFail
(
SpeechTranscriberResponse
response
)
{
logger
.
error
(
"失败处理"
);
logger
.
error
(
"task_id: "
+
response
.
getTaskId
()
+
", status: "
+
response
.
getStatus
()
+
", status_text: "
+
response
.
getStatusText
());
}
});
transcriber
.
process
();
}
public
SpeechTranscriber
(
File
speechFile
,
SpeechTranscriberListener
listener
)
{
if
(
speechFile
!=
null
&&
speechFile
.
exists
()
&&
speechFile
.
isFile
())
{
if
(
speechFile
==
null
||
!
speechFile
.
exists
()
||
!
speechFile
.
isFile
())
{
this
.
speechFile
=
speechFile
;
this
.
listener
=
listener
;
}
else
{
...
...
amos-boot-utils/amos-boot-utils-speech/src/test/java/com/yejoin/amos/SpeechApplicationTests.java
0 → 100644
View file @
8c96d81b
package
com
.
yejoin
.
amos
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse
;
import
com.yeejoin.amos.speech.SpeechFlashRecognizer
;
import
com.yeejoin.amos.speech.SpeechRecognizer
;
import
com.yeejoin.amos.speech.SpeechTranscriber
;
import
okhttp3.Call
;
import
okhttp3.Callback
;
import
okhttp3.Response
;
import
org.junit.jupiter.api.Test
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.io.File
;
import
java.io.IOException
;
public
class
SpeechApplicationTests
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechApplicationTests
.
class
);
/**
* 实时语音识别使用示例
*/
@Test
void
testSpeechTranscriber
()
{
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String
fileLink
=
"https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav"
;
//将上面fileLink文件下载到本地后,替换filepath为本地地址测试
String
filepath
=
"D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav"
;
SpeechTranscriber
transcriber
=
new
SpeechTranscriber
(
new
File
(
filepath
),
new
SpeechTranscriberListener
()
{
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public
void
onTranscriptionResultChange
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"语音识别过程中返回的结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 服务端准备好了进行识别
*/
@Override
public
void
onTranscriberStart
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端准备好了进行识别"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public
void
onSentenceBegin
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的开始"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public
void
onSentenceEnd
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的结束"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//置信度
", confidence: "
+
response
.
getConfidence
()
+
//开始时间
", begin_time: "
+
response
.
getSentenceBeginTime
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public
void
onTranscriptionComplete
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"识别结束后返回的最终结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
()
+
",result:"
+
response
.
getTransSentenceText
());
}
/**
* 失败处理
*/
@Override
public
void
onFail
(
SpeechTranscriberResponse
response
)
{
logger
.
error
(
"失败处理"
);
logger
.
error
(
"task_id: "
+
response
.
getTaskId
()
+
", status: "
+
response
.
getStatus
()
+
", status_text: "
+
response
.
getStatusText
());
}
});
transcriber
.
process
();
}
/**
* 语音文件识别极速版使用示例
*/
@Test
void
testSpeechFlashRecognizer
()
{
//String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
// 重要:此处用一个本地文件来模拟发送实时流数据,实际使用时,您可以从某处实时采集或接收语音流并发送到ASR服务端。
String
fileName
=
"D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3"
;
String
format
=
"mp3"
;
int
sampleRate
=
16000
;
SpeechFlashRecognizer
speechFlashRecognizer
=
new
SpeechFlashRecognizer
(
new
File
(
fileName
),
sampleRate
,
new
Callback
()
{
@Override
public
void
onFailure
(
Call
call
,
IOException
e
)
{
logger
.
error
(
"语音识别失败,原因:"
+
e
.
getMessage
());
}
@Override
public
void
onResponse
(
Call
call
,
Response
response
)
throws
IOException
{
assert
response
.
body
()
!=
null
;
logger
.
warn
(
"语音识别结果:"
+
response
.
body
().
string
());
}
});
speechFlashRecognizer
.
process
();
}
/**
* 语音文件识别普通版使用示例
*/
@Test
void
testSpeechRecognizer
()
{
String
fileLink
=
"https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav"
;
SpeechRecognizer
speechRecognizer
=
new
SpeechRecognizer
();
// 第一步:提交录音文件识别请求,获取任务ID用于后续的识别结果轮询。
String
taskId
=
speechRecognizer
.
submitFileTransRequest
(
fileLink
);
if
(
taskId
!=
null
)
{
logger
.
info
(
"录音文件识别请求成功,task_id: "
+
taskId
);
}
else
{
logger
.
error
(
"录音文件识别请求失败!"
);
return
;
}
// 第二步:根据任务ID轮询识别结果。
String
result
=
speechRecognizer
.
getFileTransResult
(
taskId
);
if
(
result
!=
null
)
{
logger
.
info
(
"录音文件识别结果查询成功:"
+
result
);
}
else
{
logger
.
error
(
"录音文件识别结果查询失败!"
);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment