Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
amos-boot-biz
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
项目统一框架
amos-boot-biz
Commits
5ebb50c4
Commit
5ebb50c4
authored
Dec 01, 2021
by
helinlin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加阿里云音频识别接口
parent
d82751ae
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
472 additions
and
242 deletions
+472
-242
AudioToText.java
...oin/amos/boot/module/jcs/biz/audioToText/AudioToText.java
+62
-166
SocketClient.java
...in/amos/boot/module/jcs/biz/audioToText/SocketClient.java
+54
-0
SpeechTranscriberListener.java
...module/jcs/biz/audioToText/SpeechTranscriberListener.java
+195
-0
Audio2TextController.java
.../boot/module/jcs/biz/controller/Audio2TextController.java
+70
-0
AppSpeechTranscriber.java
...in/java/com/yeejoin/amos/speech/AppSpeechTranscriber.java
+75
-60
SpeechApplicationTests.java
...src/test/java/com/yejoin/amos/SpeechApplicationTests.java
+16
-16
No files found.
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/AudioToText.java
View file @
5ebb50c4
package
com
.
yeejoin
.
amos
.
boot
.
module
.
jcs
.
biz
.
audioToText
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.yeejoin.amos.component.rule.config.RuleConfig
;
import
com.yeejoin.amos.speech.SpeechTranscriber
;
import
com.yeejoin.amos.speech.AppSpeechTranscriber
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.scheduling.annotation.Async
;
import
org.springframework.stereotype.Component
;
import
org.typroject.tyboot.component.emq.EmqKeeper
;
import
java.io.
File
;
import
java.
util.ArrayLis
t
;
import
java.util.
List
;
import
java.io.
IOException
;
import
java.
net.ServerSocke
t
;
import
java.util.
HashMap
;
/**
* 语音转文字
*/
@Component
public
class
AudioToText
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
AudioToText
.
class
);
//翻译成功响应码
public
static
final
int
RESULT_SUCCESS_CODE
=
20000000
;
/* //关键字
public static final ThreadLocal<AudioKeyWord> keyWordThreadLocal = new ThreadLocal<>();
//语句
public static final ThreadLocal<List<AudioRecord>> recordThreadLocal = new ThreadLocal<>();*/
public
static
int
serverPort
=
10001
;
@Autowired
private
EmqKeeper
emqKeeper
;
public
void
convert
()
{
List
<
AudioRecord
>
audioRecords
=
new
ArrayList
<>();
AudioKeyWord
audioKeyWord
=
new
AudioKeyWord
();
String
filepath
=
"D:\\ffmpeg-4.4-full_build-shared\\bin\\out.pcm"
;
SpeechTranscriber
transcriber
=
new
SpeechTranscriber
(
new
File
(
filepath
),
new
SpeechTranscriberListener
()
{
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public
void
onTranscriptionResultChange
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"语音识别过程中返回的结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
if
(
response
.
getStatus
()
==
RESULT_SUCCESS_CODE
)
{
sendToMqtt
(
response
,
audioRecords
);
extractKeyWord
(
response
,
audioKeyWord
);
}
else
{
logger
.
error
(
"异常的相应结果,响应码:"
+
response
.
getStatus
());
}
}
/**
* 服务端准备好了进行识别
*/
@Override
public
void
onTranscriberStart
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端准备好了进行识别"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public
void
onSentenceBegin
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的开始"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public
void
onSentenceEnd
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的结束"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//置信度
", confidence: "
+
response
.
getConfidence
()
+
//开始时间
", begin_time: "
+
response
.
getSentenceBeginTime
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public
void
onTranscriptionComplete
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"识别结束后返回的最终结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
()
+
",result:"
+
response
.
getTransSentenceText
());
}
/**
* 失败处理
*/
@Override
public
void
onFail
(
SpeechTranscriberResponse
response
)
{
logger
.
error
(
"失败处理"
);
logger
.
error
(
"task_id: "
+
response
.
getTaskId
()
+
", status: "
+
response
.
getStatus
()
+
", status_text: "
+
response
.
getStatusText
());
}
/**
* 开始语音转文字
*
* @param myNumber 我的电话号码
* @param callerNumber 呼入电话号码
*/
public
HashMap
<
String
,
Integer
>
convert
(
String
myNumber
,
String
callerNumber
)
{
//启动两个监听端口监听推送进来的2路语音流
ServerSocket
serverSocket1
=
initServerSocketPort
();
ServerSocket
serverSocket2
=
initServerSocketPort
();
logger
.
warn
(
"myNumber监听的端口为:"
+
serverSocket1
.
getLocalPort
()
+
" callerNumber监听的端口为:"
+
serverSocket2
.
getLocalPort
());
//我的语音流
Thread
thread1
=
new
Thread
(()
->
{
AppSpeechTranscriber
transcriber
=
new
AppSpeechTranscriber
(
new
SpeechTranscriberListener
(
myNumber
,
emqKeeper
),
serverSocket1
);
transcriber
.
process
();
});
//呼入的语音流
Thread
thread2
=
new
Thread
(()
->
{
AppSpeechTranscriber
transcriber
=
new
AppSpeechTranscriber
(
new
SpeechTranscriberListener
(
callerNumber
,
emqKeeper
),
serverSocket2
);
transcriber
.
process
();
});
transcriber
.
process
();
thread1
.
setUncaughtExceptionHandler
(
new
SubUncaughtExceptionHandler
(
serverSocket1
));
thread2
.
setUncaughtExceptionHandler
(
new
SubUncaughtExceptionHandler
(
serverSocket2
));
thread1
.
start
();
thread2
.
start
();
HashMap
<
String
,
Integer
>
map
=
new
HashMap
<>();
map
.
put
(
myNumber
,
serverSocket1
.
getLocalPort
());
map
.
put
(
callerNumber
,
serverSocket2
.
getLocalPort
());
return
map
;
}
/**
* 发送结果至mqtt
*
* @param response 语音句子识别返回结果
* @param audioRecords 历史识别记录
* 获取一个ServerSocket端口号
*/
@Async
public
void
sendToMqtt
(
SpeechTranscriberResponse
response
,
List
<
AudioRecord
>
audioRecords
)
{
AudioRecord
audioRecord
=
new
AudioRecord
.
AudioRecordBuilder
()
.
type
(
0
)
// TODO 区别说话角色
.
taskId
(
response
.
getTaskId
())
.
name
(
response
.
getName
())
.
status
(
response
.
getStatus
())
.
index
(
response
.
getTransSentenceIndex
())
.
message
(
response
.
getTransSentenceText
())
.
confidence
(
response
.
getConfidence
())
.
beginTime
(
response
.
getSentenceBeginTime
())
.
time
(
response
.
getTransSentenceTime
())
.
build
();
audioRecords
.
set
(
audioRecord
.
getIndex
(),
audioRecord
);
try
{
ObjectMapper
objectMapper
=
new
ObjectMapper
();
emqKeeper
.
getMqttClient
().
publish
(
MessageType
.
RECORD
.
getName
(),
objectMapper
.
writeValueAsString
(
audioRecord
).
getBytes
(),
RuleConfig
.
DEFAULT_QOS
,
true
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
error
(
"发送音频识别结果消息异常,原因:"
+
e
.
getMessage
());
private
ServerSocket
initServerSocketPort
()
{
while
(
true
)
{
try
{
return
new
ServerSocket
(
serverPort
);
}
catch
(
IOException
exception
)
{
serverPort
++;
if
(
serverPort
==
65535
)
serverPort
=
10000
;
}
}
}
/**
* 提取关键字,并发送至mqtt
*
* @param response 语音句子识别返回结果
* @param audioKeyWord //关键字结果集
* 线程抛出异常,关闭监听端口,防止端口未安全关闭
*/
@Async
public
void
extractKeyWord
(
SpeechTranscriberResponse
response
,
AudioKeyWord
audioKeyWord
)
{
try
{
String
result
=
response
.
getTransSentenceText
();
//寻找关键字
for
(
MessageKeywords
messageKeyword
:
MessageKeywords
.
values
())
{
for
(
String
keyword
:
messageKeyword
.
getKeyword
())
{
int
index
=
result
.
indexOf
(
keyword
);
//TODO 暂时截取到末尾
if
(
index
!=
-
1
)
{
String
keywordValue
=
result
.
substring
(
index
);
audioKeyWord
.
getValues
().
get
(
messageKeyword
.
getType
()).
add
(
keywordValue
);
}
static
class
SubUncaughtExceptionHandler
implements
Thread
.
UncaughtExceptionHandler
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SubUncaughtExceptionHandler
.
class
);
ServerSocket
serverSocket
;
public
SubUncaughtExceptionHandler
(
ServerSocket
serverSocket
)
{
this
.
serverSocket
=
serverSocket
;
}
@Override
public
void
uncaughtException
(
Thread
t
,
Throwable
e
)
{
if
(
serverSocket
!=
null
&&
!
serverSocket
.
isClosed
())
{
try
{
serverSocket
.
close
();
logger
.
error
(
"子线程出现异常,已关闭音频监听端口。"
+
e
.
getMessage
());
}
catch
(
IOException
exception
)
{
exception
.
printStackTrace
();
}
}
ObjectMapper
objectMapper
=
new
ObjectMapper
();
byte
[]
bytes
=
objectMapper
.
writeValueAsString
(
audioKeyWord
.
getValues
()).
getBytes
();
emqKeeper
.
getMqttClient
().
publish
(
MessageType
.
KEYWORD
.
getName
(),
bytes
,
RuleConfig
.
DEFAULT_QOS
,
true
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
error
(
"发送音频关键字消息异常,原因:"
+
e
.
getMessage
());
}
}
}
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/SocketClient.java
0 → 100644
View file @
5ebb50c4
package
com
.
yeejoin
.
amos
.
boot
.
module
.
jcs
.
biz
.
audioToText
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.scheduling.annotation.Async
;
import
org.springframework.stereotype.Component
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.OutputStream
;
import
java.net.InetSocketAddress
;
import
java.net.Socket
;
import
java.util.concurrent.TimeUnit
;
/**
* 模拟客户端发送数据
*/
@Component
public
class
SocketClient
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SocketClient
.
class
);
private
static
final
String
[]
testFilePath
=
{
"D:\\ffmpeg-4.4-full_build-shared\\bin\\out.pcm"
,
};
public
static
void
main
(
String
[]
args
)
{
SocketClient
socketClient
=
new
SocketClient
();
socketClient
.
process
(
0
,
0
);
}
@Async
public
void
process
(
int
port
,
int
type
)
{
if
(
type
<
0
)
type
=
0
;
if
(
type
>=
testFilePath
.
length
)
type
-=
1
;
Socket
socket
=
new
Socket
();
try
{
socket
.
connect
(
new
InetSocketAddress
(
"127.0.0.1"
,
port
));
OutputStream
outputStream
=
socket
.
getOutputStream
();
FileInputStream
fis
=
new
FileInputStream
(
new
File
(
testFilePath
[
type
]));
byte
[]
b
=
new
byte
[
4096
];
int
len
;
while
((
len
=
fis
.
read
(
b
))
>
0
)
{
logger
.
info
(
"send data pack length: "
+
len
);
outputStream
.
write
(
b
);
TimeUnit
.
MILLISECONDS
.
sleep
(
400
);
}
outputStream
.
flush
();
outputStream
.
close
();
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
}
}
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/SpeechTranscriberListener.java
0 → 100644
View file @
5ebb50c4
package
com
.
yeejoin
.
amos
.
boot
.
module
.
jcs
.
biz
.
audioToText
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse
;
import
com.fasterxml.jackson.databind.ObjectMapper
;
import
com.yeejoin.amos.component.rule.config.RuleConfig
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
org.springframework.scheduling.annotation.Async
;
import
org.typroject.tyboot.component.emq.EmqKeeper
;
import
java.util.ArrayList
;
import
java.util.List
;
/**
* 实时语音识别 回调函数
*/
public
class
SpeechTranscriberListener
extends
com
.
alibaba
.
nls
.
client
.
protocol
.
asr
.
SpeechTranscriberListener
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechTranscriberListener
.
class
);
public
static
final
int
RESULT_SUCCESS_CODE
=
20000000
;
//识别的记录
List
<
AudioRecord
>
audioRecords
=
new
ArrayList
<>();
//识别的关键字
AudioKeyWord
audioKeyWord
=
new
AudioKeyWord
();
//当前识别的音频来源
private
final
String
number
;
//mqtt客户端
private
final
EmqKeeper
emqKeeper
;
public
SpeechTranscriberListener
(
String
number
,
EmqKeeper
emqKeeper
)
{
this
.
number
=
number
;
this
.
emqKeeper
=
emqKeeper
;
}
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public
void
onTranscriptionResultChange
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"语音识别过程中返回的结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
if
(
response
.
getStatus
()
==
RESULT_SUCCESS_CODE
)
{
sendToMqtt
(
response
,
audioRecords
,
number
);
extractKeyWord
(
response
,
audioKeyWord
,
number
);
}
else
{
logger
.
error
(
"异常的相应结果,响应码:"
+
response
.
getStatus
());
}
}
/**
* 服务端准备好了进行识别
*/
@Override
public
void
onTranscriberStart
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端准备好了进行识别"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public
void
onSentenceBegin
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的开始"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public
void
onSentenceEnd
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"服务端检测到了一句话的结束"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
//状态码“20000000”表示正常识别。
", status: "
+
response
.
getStatus
()
+
//句子编号,从1开始递增。
", index: "
+
response
.
getTransSentenceIndex
()
+
//当前的识别结果。
", result: "
+
response
.
getTransSentenceText
()
+
//置信度
", confidence: "
+
response
.
getConfidence
()
+
//开始时间
", begin_time: "
+
response
.
getSentenceBeginTime
()
+
//当前已处理的音频时长,单位为毫秒。
", time: "
+
response
.
getTransSentenceTime
());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public
void
onTranscriptionComplete
(
SpeechTranscriberResponse
response
)
{
logger
.
warn
(
"识别结束后返回的最终结果"
);
logger
.
warn
(
"task_id: "
+
response
.
getTaskId
()
+
", name: "
+
response
.
getName
()
+
", status: "
+
response
.
getStatus
()
+
",result:"
+
response
.
getTransSentenceText
());
}
/**
* 失败处理
*/
@Override
public
void
onFail
(
SpeechTranscriberResponse
response
)
{
logger
.
error
(
"失败处理"
);
logger
.
error
(
"task_id: "
+
response
.
getTaskId
()
+
", status: "
+
response
.
getStatus
()
+
", status_text: "
+
response
.
getStatusText
());
}
/**
* 异步发送结果至mqtt,保持回调函数畅通
*
* @param response 语音句子识别返回结果
* @param audioRecords 历史识别记录
*/
@Async
public
void
sendToMqtt
(
SpeechTranscriberResponse
response
,
List
<
AudioRecord
>
audioRecords
,
String
number
)
{
AudioRecord
audioRecord
=
new
AudioRecord
.
AudioRecordBuilder
()
.
type
(
response
.
getTransSentenceIndex
()
%
2
)
// TODO 区别说话角色
.
taskId
(
response
.
getTaskId
())
.
name
(
response
.
getName
())
.
status
(
response
.
getStatus
())
.
index
(
response
.
getTransSentenceIndex
())
.
message
(
response
.
getTransSentenceText
())
.
confidence
(
response
.
getConfidence
())
.
time
(
response
.
getTransSentenceTime
())
.
build
();
int
index
=
audioRecord
.
getIndex
()
-
1
;
if
(
index
>=
audioRecords
.
size
())
{
audioRecords
.
add
(
audioRecord
);
}
else
{
audioRecords
.
set
(
index
,
audioRecord
);
}
try
{
ObjectMapper
objectMapper
=
new
ObjectMapper
();
emqKeeper
.
getMqttClient
().
publish
(
MessageType
.
RECORD
.
getName
()
+
"_"
+
number
,
objectMapper
.
writeValueAsString
(
audioRecord
).
getBytes
(),
RuleConfig
.
DEFAULT_QOS
,
true
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
error
(
"发送音频识别结果消息异常,原因:"
+
e
.
getMessage
());
}
}
/**
* 异步发送结果至mqtt,保持回调函数畅通
*
* @param response 语音句子识别返回结果
* @param audioKeyWord //关键字结果集
*/
@Async
public
void
extractKeyWord
(
SpeechTranscriberResponse
response
,
AudioKeyWord
audioKeyWord
,
String
number
)
{
try
{
String
result
=
response
.
getTransSentenceText
();
//寻找关键字
for
(
MessageKeywords
messageKeyword
:
MessageKeywords
.
values
())
{
for
(
String
keyword
:
messageKeyword
.
getKeyword
())
{
int
index
=
result
.
indexOf
(
keyword
);
//TODO 暂时截取到末尾
if
(
index
!=
-
1
)
{
String
keywordValue
=
result
.
substring
(
index
);
audioKeyWord
.
getValues
().
get
(
messageKeyword
.
getType
()).
add
(
keywordValue
);
}
}
}
ObjectMapper
objectMapper
=
new
ObjectMapper
();
byte
[]
bytes
=
objectMapper
.
writeValueAsString
(
audioKeyWord
.
getValues
()).
getBytes
();
emqKeeper
.
getMqttClient
().
publish
(
MessageType
.
KEYWORD
.
getName
()
+
"_"
+
number
,
bytes
,
RuleConfig
.
DEFAULT_QOS
,
true
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
logger
.
error
(
"发送音频关键字消息异常,原因:"
+
e
.
getMessage
());
}
}
}
amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/controller/Audio2TextController.java
0 → 100644
View file @
5ebb50c4
package
com
.
yeejoin
.
amos
.
boot
.
module
.
jcs
.
biz
.
controller
;
import
com.yeejoin.amos.boot.module.jcs.biz.audioToText.AudioToText
;
import
com.yeejoin.amos.boot.module.jcs.biz.audioToText.SocketClient
;
import
io.swagger.annotations.Api
;
import
io.swagger.annotations.ApiOperation
;
import
org.springframework.beans.factory.annotation.Autowired
;
import
org.springframework.web.bind.annotation.GetMapping
;
import
org.springframework.web.bind.annotation.RequestMapping
;
import
org.springframework.web.bind.annotation.RequestParam
;
import
org.springframework.web.bind.annotation.RestController
;
import
org.typroject.tyboot.core.foundation.enumeration.UserType
;
import
org.typroject.tyboot.core.restful.doc.TycloudOperation
;
import
java.util.HashMap
;
@RestController
@Api
(
tags
=
"语音转文字Api"
)
@RequestMapping
(
value
=
"/Audio2TextController"
)
public
class
Audio2TextController
{
@Autowired
AudioToText
audioToText
;
@Autowired
SocketClient
socketClient
;
/**
* 测试语音转文字第一步
*
* @param myNumber 我的手机号
*/
@TycloudOperation
(
ApiLevel
=
UserType
.
AGENCY
)
@GetMapping
(
"/startConvert"
)
@ApiOperation
(
httpMethod
=
"GET"
,
value
=
"测试语音转文字第一步"
,
notes
=
"number为当前用户的手机号"
)
public
HashMap
<
String
,
Integer
>
startConvert
(
@RequestParam
String
myNumber
,
@RequestParam
String
callerNumber
)
{
return
audioToText
.
convert
(
myNumber
,
callerNumber
);
}
/**
* 测试语音转文字第二步
*/
@TycloudOperation
(
ApiLevel
=
UserType
.
AGENCY
)
@GetMapping
(
"/startSendAudio"
)
@ApiOperation
(
httpMethod
=
"GET"
,
value
=
"测试语音转文字第二步"
,
notes
=
"测试语音转文字第二步"
)
public
String
startSendAudio
(
@RequestParam
int
port
,
Integer
type
)
{
if
(
type
==
null
)
type
=
0
;
socketClient
.
process
(
port
,
type
);
return
"success"
;
}
/**
* 第一步收到转换请求后,启动两个serverSocket,监听不同端口
* 第一步调用语音融合系统的API并传递两个监听的端口号和本机IP地址
* 第三步serverSocket收到数据请求,开始将数据推至阿里云语音识别系统进行识别
* 第四步回调函数中获取识别结果,使用mqtt客户端推送至mqtt服务器
* 第五步前端订阅消息并进行展示
*
* @param cid 通话id
* @param myPhone 我的手机号
* @param caller 呼入手机号
*/
@TycloudOperation
(
ApiLevel
=
UserType
.
AGENCY
)
@GetMapping
(
"/startConvertText"
)
@ApiOperation
(
httpMethod
=
"GET"
,
value
=
"静听电话回调后端开始转文字"
,
notes
=
"静听电话回调后端开始转文字"
)
public
void
startConvertText
(
String
cid
,
String
myPhone
,
String
caller
)
{
}
}
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/SpeechTranscriber.java
→
amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/
App
SpeechTranscriber.java
View file @
5ebb50c4
...
...
@@ -2,13 +2,15 @@ package com.yeejoin.amos.speech;
import
com.alibaba.nls.client.protocol.InputFormatEnum
;
import
com.alibaba.nls.client.protocol.SampleRateEnum
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriber
;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.util.concurrent.TimeUnit
;
import
java.io.IOException
;
import
java.io.InputStream
;
import
java.net.ServerSocket
;
import
java.net.Socket
;
/**
...
...
@@ -17,85 +19,98 @@ import java.util.concurrent.TimeUnit;
* 支持的输入格式:PCM(无压缩的PCM或WAV文件)、16 bit采样位数、单声道(mono)。
* 支持的音频采样率:8000 Hz和16000 Hz。
*/
public
class
SpeechTranscriber
{
public
class
App
SpeechTranscriber
{
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
SpeechTranscriber
.
class
);
//语音识别文件
private
final
File
speechFile
;
//识别回调函数
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
AppSpeechTranscriber
.
class
);
private
final
SpeechTranscriberListener
listener
;
private
final
ServerSocket
serverSocket
;
public
SpeechTranscriber
(
File
speechFile
,
SpeechTranscriberListener
listener
)
{
if
(
speechFile
!=
null
&&
speechFile
.
exists
()
&&
speechFile
.
isFile
())
{
this
.
speechFile
=
speechFile
;
this
.
listener
=
listener
;
}
else
{
throw
new
IllegalArgumentException
(
"待识别的文件存在异常"
);
}
public
AppSpeechTranscriber
(
SpeechTranscriberListener
listener
,
ServerSocket
serverSocket
)
{
this
.
listener
=
listener
;
this
.
serverSocket
=
serverSocket
;
}
/**
* 开始语音识别
*/
public
void
process
()
{
com
.
alibaba
.
nls
.
client
.
protocol
.
asr
.
SpeechTranscriber
transcriber
=
null
;
SpeechTranscriber
transcriber
=
null
;
try
{
//启动ServerSocket等待接收音频数据,只接受一次请求
logger
.
warn
(
"serverSocket已启动,地址:"
+
serverSocket
.
getInetAddress
().
getHostAddress
()
+
"监听端口:"
+
serverSocket
.
getLocalPort
()
+
" 等待语音融合系统推送数据..."
);
Socket
socket
=
serverSocket
.
accept
();
logger
.
warn
(
"收到用户连接请求,开始读取数据"
);
//创建实例、建立连接。
transcriber
=
new
com
.
alibaba
.
nls
.
client
.
protocol
.
asr
.
SpeechTranscriber
(
AppNslClient
.
instance
(),
listener
);
transcriber
.
setAppKey
(
SpeechConfig
.
AppKey
);
//输入音频编码方式。
transcriber
.
setFormat
(
InputFormatEnum
.
PCM
);
//输入音频采样率。
transcriber
.
setSampleRate
(
SampleRateEnum
.
SAMPLE_RATE_16K
);
//是否返回中间识别结果。
transcriber
.
setEnableIntermediateResult
(
true
);
//是否生成并返回标点符号。
transcriber
.
setEnablePunctuation
(
true
);
//是否将返回结果规整化,比如将一百返回为100。
transcriber
.
setEnableITN
(
true
);
//设置vad断句参数。默认值:800ms,有效值:200ms~2000ms。
transcriber
.
addCustomedParam
(
"max_sentence_silence"
,
500
);
//设置是否语义断句。
//transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
//设置是否开启顺滑。
//transcriber.addCustomedParam("disfluency",true);
//设置是否开启词模式。
transcriber
.
addCustomedParam
(
"enable_words"
,
true
);
//设置vad噪音阈值参数,参数取值为-1~+1,如-0.9、-0.8、0.2、0.9。
//取值越趋于-1,判定为语音的概率越大,亦即有可能更多噪声被当成语音被误识别。
//取值越趋于+1,判定为噪音的越多,亦即有可能更多语音段被当成噪音被拒绝识别。
//该参数属高级参数,调整需慎重和重点测试。
//transcriber.addCustomedParam("speech_noise_threshold",0.3);
//设置训练后的定制语言模型id。
//transcriber.addCustomedParam("customization_id","你的定制语言模型id");
//设置训练后的定制热词id。
//transcriber.addCustomedParam("vocabulary_id","你的定制热词id");
//设置是否忽略单句超时。
transcriber
.
addCustomedParam
(
"enable_ignore_sentence_timeout"
,
false
);
//vad断句开启后处理。
//transcriber.addCustomedParam("enable_vad_unify_post",false);
//此方法将以上参数设置序列化为JSON发送给服务端,并等待服务端确认。
transcriber
=
new
SpeechTranscriber
(
AppNslClient
.
instance
(),
listener
);
//设置识别参数
setParam
(
transcriber
);
//开始任务
transcriber
.
start
();
FileInputStream
fis
=
new
FileInputStream
(
speechFile
);
byte
[]
b
=
new
byte
[
3200
];
InputStream
inputStream
=
socket
.
getInputStream
(
);
byte
[]
b
=
new
byte
[
4096
];
int
len
;
while
((
len
=
fis
.
read
(
b
))
>
0
)
{
logger
.
info
(
"
send
data pack length: "
+
len
);
while
((
len
=
inputStream
.
read
(
b
))
>
0
)
{
logger
.
info
(
"
receive
data pack length: "
+
len
);
transcriber
.
send
(
b
,
len
);
//本案例用读取本地文件的形式模拟实时获取语音流并发送的,因为读取速度较快,这里需要设置sleep,如果实时获取语音则无需设置sleep
TimeUnit
.
MILLISECONDS
.
sleep
(
400
);
}
//通知服务端语音数据发送完毕,等待服务端处理完成。
long
now
=
System
.
currentTimeMillis
();
logger
.
info
(
"ASR wait for complete"
);
socket
.
close
();
//结束任务
transcriber
.
stop
();
logger
.
info
(
"ASR latency : "
+
(
System
.
currentTimeMillis
()
-
now
)
+
" ms
"
);
logger
.
warn
(
"语音转文字已结束
"
);
}
catch
(
Exception
e
)
{
logger
.
error
(
e
.
getMessage
());
}
finally
{
if
(
null
!=
transcriber
)
{
transcriber
.
close
();
}
if
(!
serverSocket
.
isClosed
())
{
try
{
serverSocket
.
close
();
}
catch
(
IOException
exception
)
{
exception
.
printStackTrace
();
logger
.
error
(
exception
.
getMessage
());
}
}
}
}
/**
* 设置识别参数
*/
private
void
setParam
(
SpeechTranscriber
transcriber
)
{
transcriber
.
setAppKey
(
SpeechConfig
.
AppKey
);
//输入音频编码方式。
transcriber
.
setFormat
(
InputFormatEnum
.
PCM
);
//输入音频采样率。
transcriber
.
setSampleRate
(
SampleRateEnum
.
SAMPLE_RATE_16K
);
//是否返回中间识别结果。
transcriber
.
setEnableIntermediateResult
(
true
);
//是否生成并返回标点符号。
transcriber
.
setEnablePunctuation
(
true
);
//是否将返回结果规整化,比如将一百返回为100。
transcriber
.
setEnableITN
(
true
);
//设置vad断句参数。默认值:800ms,有效值:200ms~2000ms。
transcriber
.
addCustomedParam
(
"max_sentence_silence"
,
500
);
//设置是否语义断句。
//transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
//设置是否开启顺滑。
//transcriber.addCustomedParam("disfluency",true);
//设置是否开启词模式。
transcriber
.
addCustomedParam
(
"enable_words"
,
true
);
//设置vad噪音阈值参数,参数取值为-1~+1,如-0.9、-0.8、0.2、0.9。
//取值越趋于-1,判定为语音的概率越大,亦即有可能更多噪声被当成语音被误识别。
//取值越趋于+1,判定为噪音的越多,亦即有可能更多语音段被当成噪音被拒绝识别。
//该参数属高级参数,调整需慎重和重点测试。
//transcriber.addCustomedParam("speech_noise_threshold",0.3);
//设置训练后的定制语言模型id。
//transcriber.addCustomedParam("customization_id","你的定制语言模型id");
//设置训练后的定制热词id。
//transcriber.addCustomedParam("vocabulary_id","你的定制热词id");
//设置是否忽略单句超时。
transcriber
.
addCustomedParam
(
"enable_ignore_sentence_timeout"
,
false
);
//vad断句开启后处理。
//transcriber.addCustomedParam("enable_vad_unify_post",false);
//此方法将以上参数设置序列化为JSON发送给服务端,并等待服务端确认。
}
}
amos-boot-utils/amos-boot-utils-speech/src/test/java/com/yejoin/amos/SpeechApplicationTests.java
View file @
5ebb50c4
...
...
@@ -4,7 +4,7 @@ import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import
com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse
;
import
com.yeejoin.amos.speech.SpeechFlashRecognizer
;
import
com.yeejoin.amos.speech.SpeechRecognizer
;
import
com.yeejoin.amos.speech.SpeechTranscriber
;
import
com.yeejoin.amos.speech.
App
SpeechTranscriber
;
import
okhttp3.Call
;
import
okhttp3.Callback
;
import
okhttp3.Response
;
...
...
@@ -23,14 +23,14 @@ public class SpeechApplicationTests {
*/
@Test
void
testSpeechTranscriber
()
{
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
/*
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
//将上面fileLink文件下载到本地后,替换filepath为本地地址测试
String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\out.pcm";
SpeechTranscriber
transcriber
=
new
SpeechTranscriber
(
new
File
(
filepath
),
new
SpeechTranscriberListener
()
{
/**
AppSpeechTranscriber transcriber = new AppSpeechTranscriber(
new SpeechTranscriberListener() {
*/
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
*/
/*
@Override
public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
logger.warn("语音识别过程中返回的结果");
...
...
@@ -46,9 +46,9 @@ public class SpeechApplicationTests {
", time: " + response.getTransSentenceTime());
}
/**
*/
/**
* 服务端准备好了进行识别
*/
*/
/*
@Override
public void onTranscriberStart(SpeechTranscriberResponse response) {
logger.warn("服务端准备好了进行识别");
...
...
@@ -57,9 +57,9 @@ public class SpeechApplicationTests {
+ ", status: " + response.getStatus());
}
/**
*/
/**
* 服务端检测到了一句话的开始
*/
*/
/*
@Override
public void onSentenceBegin(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的开始");
...
...
@@ -69,10 +69,10 @@ public class SpeechApplicationTests {
}
/**
*/
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
*/
/*
@Override
public void onSentenceEnd(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的结束");
...
...
@@ -92,9 +92,9 @@ public class SpeechApplicationTests {
", time: " + response.getTransSentenceTime());
}
/**
*/
/**
* 识别结束后返回的最终结果
*/
*/
/*
@Override
public void onTranscriptionComplete(SpeechTranscriberResponse response) {
logger.warn("识别结束后返回的最终结果");
...
...
@@ -104,9 +104,9 @@ public class SpeechApplicationTests {
+ ",result:" + response.getTransSentenceText());
}
/**
*/
/**
* 失败处理
*/
*/
/*
@Override
public void onFail(SpeechTranscriberResponse response) {
logger.error("失败处理");
...
...
@@ -115,7 +115,7 @@ public class SpeechApplicationTests {
+ ", status_text: " + response.getStatusText());
}
});
transcriber
.
process
();
transcriber.process();
*/
}
/**
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment