Commit b1bdd1a2 authored by helinlin's avatar helinlin

添加阿里云音频识别接口

parent 2b014717
......@@ -10,6 +10,7 @@
<artifactId>amos-boot-module-jcs-biz</artifactId>
<dependencies>
<dependency>
<groupId>com.amosframework.boot</groupId>
<artifactId>amos-boot-module-jcs-api</artifactId>
......@@ -45,6 +46,12 @@
<artifactId>spire.doc</artifactId>
<version>4.9.0</version>
</dependency>
<dependency>
<groupId>com.amosframework.boot</groupId>
<artifactId>amos-boot-utils-speech</artifactId>
<version>1.0.0</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
package com.yeejoin.amos.boot.module.jcs.biz.audioToText;
import lombok.Data;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* 音频关键字结果集
*/
@Data
public class AudioKeyWord {
private Map<String, Set<String>> values;
public AudioKeyWord() {
values = new HashMap<>();
//地址
values.put(MessageKeywords.LOCATION.getType(), new HashSet<>());
//联系人
values.put(MessageKeywords.CONTACT.getType(), new HashSet<>());
//联系电话
values.put(MessageKeywords.CONTACT_NUMBER.getType(), new HashSet<>());
//事发单位
values.put(MessageKeywords.UNIT.getType(), new HashSet<>());
//重点部位
values.put(MessageKeywords.PART.getType(), new HashSet<>());
}
public Map<String, Set<String>> getValues() {
return values;
}
public void setValues(Map<String, Set<String>> values) {
this.values = values;
}
}
\ No newline at end of file
package com.yeejoin.amos.boot.module.jcs.biz.audioToText;
import lombok.Builder;
import lombok.Data;
/**
* 音频文字识别结果
*/
@Data
@Builder
public class AudioRecord {
/**
* 说话角色
*/
private int type;
/**
* 响应码 20000000 正常返回
*/
private int status;
/**
* 句子索引
*/
private int index;
/**
* 句子所在的任务id
*/
private String taskId;
/**
* 状态
*/
private String name;
/***
* 识别结果
*/
private String message;
/**
* 结果置信度,0.0-1.0 值越大置信度越高
*/
private double confidence;
/***
* sentenceBegin事件对应的时间
*/
private long beginTime;
/**
* 当前已处理的音频时长,单位是毫秒
*/
private long time;
}
package com.yeejoin.amos.boot.module.jcs.biz.audioToText;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yeejoin.amos.component.rule.config.RuleConfig;
import com.yeejoin.amos.speech.SpeechTranscriber;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;
import org.typroject.tyboot.component.emq.EmqKeeper;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
@Component
public class AudioToText {
private static final Logger logger = LoggerFactory.getLogger(AudioToText.class);
//翻译成功响应码
public static final int RESULT_SUCCESS_CODE = 20000000;
/* //关键字
public static final ThreadLocal<AudioKeyWord> keyWordThreadLocal = new ThreadLocal<>();
//语句
public static final ThreadLocal<List<AudioRecord>> recordThreadLocal = new ThreadLocal<>();*/
@Autowired
private EmqKeeper emqKeeper;
public void convert() {
List<AudioRecord> audioRecords = new ArrayList<>();
AudioKeyWord audioKeyWord = new AudioKeyWord();
String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\out.pcm";
SpeechTranscriber transcriber = new SpeechTranscriber(new File(filepath), new SpeechTranscriberListener() {
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
logger.warn("语音识别过程中返回的结果");
logger.warn("task_id: " + response.getTaskId() +
", name: " + response.getName() +
//状态码“20000000”表示正常识别。
", status: " + response.getStatus() +
//句子编号,从1开始递增。
", index: " + response.getTransSentenceIndex() +
//当前的识别结果。
", result: " + response.getTransSentenceText() +
//当前已处理的音频时长,单位为毫秒。
", time: " + response.getTransSentenceTime());
if (response.getStatus() == RESULT_SUCCESS_CODE) {
sendToMqtt(response, audioRecords);
extractKeyWord(response, audioKeyWord);
} else {
logger.error("异常的相应结果,响应码:" + response.getStatus());
}
}
/**
* 服务端准备好了进行识别
*/
@Override
public void onTranscriberStart(SpeechTranscriberResponse response) {
logger.warn("服务端准备好了进行识别");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public void onSentenceBegin(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的开始");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public void onSentenceEnd(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的结束");
logger.warn("task_id: " + response.getTaskId() +
", name: " + response.getName() +
//状态码“20000000”表示正常识别。
", status: " + response.getStatus() +
//句子编号,从1开始递增。
", index: " + response.getTransSentenceIndex() +
//当前的识别结果。
", result: " + response.getTransSentenceText() +
//置信度
", confidence: " + response.getConfidence() +
//开始时间
", begin_time: " + response.getSentenceBeginTime() +
//当前已处理的音频时长,单位为毫秒。
", time: " + response.getTransSentenceTime());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public void onTranscriptionComplete(SpeechTranscriberResponse response) {
logger.warn("识别结束后返回的最终结果");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus()
+ ",result:" + response.getTransSentenceText());
}
/**
* 失败处理
*/
@Override
public void onFail(SpeechTranscriberResponse response) {
logger.error("失败处理");
logger.error("task_id: " + response.getTaskId()
+ ", status: " + response.getStatus()
+ ", status_text: " + response.getStatusText());
}
});
transcriber.process();
}
/**
* 发送结果至mqtt
*
* @param response 语音句子识别返回结果
* @param audioRecords 历史识别记录
*/
@Async
public void sendToMqtt(SpeechTranscriberResponse response, List<AudioRecord> audioRecords) {
AudioRecord audioRecord = new AudioRecord.AudioRecordBuilder()
.type(0) // TODO 区别说话角色
.taskId(response.getTaskId())
.name(response.getName())
.status(response.getStatus())
.index(response.getTransSentenceIndex())
.message(response.getTransSentenceText())
.confidence(response.getConfidence())
.beginTime(response.getSentenceBeginTime())
.time(response.getTransSentenceTime())
.build();
audioRecords.set(audioRecord.getIndex(), audioRecord);
try {
ObjectMapper objectMapper = new ObjectMapper();
emqKeeper.getMqttClient().publish(
MessageType.RECORD.getName(),
objectMapper.writeValueAsString(audioRecord).getBytes(),
RuleConfig.DEFAULT_QOS, true);
} catch (Exception e) {
e.printStackTrace();
logger.error("发送音频识别结果消息异常,原因:" + e.getMessage());
}
}
/**
* 提取关键字,并发送至mqtt
*
* @param response 语音句子识别返回结果
* @param audioKeyWord //关键字结果集
*/
@Async
public void extractKeyWord(SpeechTranscriberResponse response, AudioKeyWord audioKeyWord) {
try {
String result = response.getTransSentenceText();
//寻找关键字
for (MessageKeywords messageKeyword : MessageKeywords.values()) {
for (String keyword : messageKeyword.getKeyword()) {
int index = result.indexOf(keyword);
//TODO 暂时截取到末尾
if (index != -1) {
String keywordValue = result.substring(index);
audioKeyWord.getValues().get(messageKeyword.getType()).add(keywordValue);
}
}
}
ObjectMapper objectMapper = new ObjectMapper();
byte[] bytes = objectMapper.writeValueAsString(audioKeyWord.getValues()).getBytes();
emqKeeper.getMqttClient().publish(
MessageType.KEYWORD.getName(),
bytes,
RuleConfig.DEFAULT_QOS, true);
} catch (Exception e) {
e.printStackTrace();
logger.error("发送音频关键字消息异常,原因:" + e.getMessage());
}
}
}
package com.yeejoin.amos.boot.module.jcs.biz.audioToText;
/**
* 语音转文字消息类型
* mqtt 发送消息类型是加上[_phoneNumber]即接电话用户的电话号码,实现区分
*/
public enum MessageType {
RECORD("audioRecord"), KEYWORD("audioKeyWord");
private String name;
MessageType(String name) {
this.name = name;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}
/**
* 语音转文字关键字查找表
*/
enum MessageKeywords {
LOCATION("location", new String[]{"地址", "位置", "地点", "在"}),
CONTACT("contact", new String[]{"联系人", "联系", "在",}),
CONTACT_NUMBER("contactNumber", new String[]{"号码是", "号码", "手机是", "手机",}),
UNIT("unit", new String[]{"事发单位", "单位",}),
PART("part", new String[]{"重点部位", "部位",});
private String type;
private String[] keyword;
MessageKeywords(String type, String[] keyword) {
this.type = type;
this.keyword = keyword;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String[] getKeyword() {
return keyword;
}
public void setKeyword(String[] keyword) {
this.keyword = keyword;
}
}
......@@ -42,4 +42,4 @@ public class AppOkHttpClient {
.build();
client.newCall(request).enqueue(callback);
}
}
\ No newline at end of file
}
......@@ -52,16 +52,15 @@ public class SpeechTranscriber {
//是否生成并返回标点符号。
transcriber.setEnablePunctuation(true);
//是否将返回结果规整化,比如将一百返回为100。
transcriber.setEnableITN(false);
transcriber.setEnableITN(true);
//设置vad断句参数。默认值:800ms,有效值:200ms~2000ms。
//transcriber.addCustomedParam("max_sentence_silence", 600);
transcriber.addCustomedParam("max_sentence_silence", 500);
//设置是否语义断句。
//transcriber.addCustomedParam("enable_semantic_sentence_detection",false);
//设置是否开启顺滑。
//transcriber.addCustomedParam("disfluency",true);
//设置是否开启词模式。
//transcriber.addCustomedParam("enable_words",true);
transcriber.addCustomedParam("enable_words",true);
//设置vad噪音阈值参数,参数取值为-1~+1,如-0.9、-0.8、0.2、0.9。
//取值越趋于-1,判定为语音的概率越大,亦即有可能更多噪声被当成语音被误识别。
//取值越趋于+1,判定为噪音的越多,亦即有可能更多语音段被当成噪音被拒绝识别。
......@@ -84,7 +83,7 @@ public class SpeechTranscriber {
logger.info("send data pack length: " + len);
transcriber.send(b, len);
//本案例用读取本地文件的形式模拟实时获取语音流并发送的,因为读取速度较快,这里需要设置sleep,如果实时获取语音则无需设置sleep
TimeUnit.MILLISECONDS.sleep(1000);
TimeUnit.MILLISECONDS.sleep(400);
}
//通知服务端语音数据发送完毕,等待服务端处理完成。
long now = System.currentTimeMillis();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment