Commit 8c96d81b authored by helinlin's avatar helinlin

添加阿里云音频识别demo

parent da3aa95c
......@@ -34,6 +34,11 @@
<artifactId>nls-sdk-transcriber</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
package com.yeejoin.amos.speech;
import okhttp3.Call;
import okhttp3.Callback;
import okhttp3.Response;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
/**
* 不支持免费用户测试
* 录音文件识别极速版(支持提交音频文件识别,文件大小,文件时长有限制)
* <p>
* 音频格式:支持AAC/MP3/OPUS/WAV格式编码的音频。
......@@ -35,45 +33,21 @@ public class SpeechFlashRecognizer {
//识别回调函数
private final Callback callback;
/**
* 使用示例
* 不支持试用,需要付费
* 不支持试用,需要付费
* 不支持试用,需要付费
*/
public static void main(String[] args) {
//String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
// 重要:此处用一个本地文件来模拟发送实时流数据,实际使用时,您可以从某处实时采集或接收语音流并发送到ASR服务端。
String fileName = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3";
String format = "mp3";
int sampleRate = 16000;
SpeechFlashRecognizer speechFlashRecognizer = new SpeechFlashRecognizer(new File(fileName), format, sampleRate, new Callback() {
@Override
public void onFailure(Call call, IOException e) {
logger.error("语音识别失败,原因:" + e.getMessage());
}
@Override
public void onResponse(Call call, Response response) throws IOException {
assert response.body() != null;
logger.warn("语音识别结果:" + response.body().string());
}
});
speechFlashRecognizer.process();
}
public SpeechFlashRecognizer(File speechFile, String format, int sampleRate, Callback callback) {
if (speechFile != null && speechFile.exists() && speechFile.isFile()) {
public SpeechFlashRecognizer(File speechFile, int sampleRate, Callback callback) {
if (speechFile == null || !speechFile.exists() || !speechFile.isFile()) {
throw new IllegalArgumentException("待识别的文件存在异常");
}
if (null == format || "".equals(format)) {
throw new IllegalArgumentException("音频文件格式不能为空");
}
if (sampleRate == 0) {
throw new IllegalArgumentException("音频采样率不能为0");
}
String[] split = speechFile.getName().split("\\.");
if (split.length != 0) {
this.format = split[split.length - 1];
} else {
throw new IllegalArgumentException("音频文件格式提取失败");
}
this.speechFile = speechFile;
this.format = format;
this.sampleRate = sampleRate;
this.callback = callback;
}
......
......@@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
import java.util.concurrent.TimeUnit;
/**
* 不支持提交文件
* 录音文件识别普通版(提供外网访问的音频文件地址)
* <p>
* 识别的文件需要提交基于HTTP可访问的URL地址,可以通过URL访问,不支持提交本地文件
......@@ -59,25 +60,6 @@ public class SpeechRecognizer {
IAcsClient client;
private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizer.class);
public static void main(String[] args) throws Exception {
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
SpeechRecognizer speechRecognizer = new SpeechRecognizer();
// 第一步:提交录音文件识别请求,获取任务ID用于后续的识别结果轮询。
String taskId = speechRecognizer.submitFileTransRequest(fileLink);
if (taskId != null) {
logger.info("录音文件识别请求成功,task_id: " + taskId);
} else {
logger.error("录音文件识别请求失败!");
return;
}
// 第二步:根据任务ID轮询识别结果。
String result = speechRecognizer.getFileTransResult(taskId);
if (result != null) {
logger.info("录音文件识别结果查询成功:" + result);
} else {
logger.error("录音文件识别结果查询失败!");
}
}
public SpeechRecognizer() {
// 设置endpoint
......@@ -92,7 +74,7 @@ public class SpeechRecognizer {
}
/**
* 提交录音文件
* 提交录音文件地址
*/
public String submitFileTransRequest(String fileLink) {
/**
......
......@@ -26,109 +26,8 @@ public class SpeechTranscriber {
//识别回调函数
private final SpeechTranscriberListener listener;
/**
* 使用示例
* 提供音频留地址
*/
public static void main(String[] args) {
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
//将上面fileLink文件下载到本地后,替换filepath为本地地址测试
String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav";
SpeechTranscriber transcriber = new SpeechTranscriber(new File(filepath), new SpeechTranscriberListener() {
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
logger.warn("语音识别过程中返回的结果");
logger.warn("task_id: " + response.getTaskId() +
", name: " + response.getName() +
//状态码“20000000”表示正常识别。
", status: " + response.getStatus() +
//句子编号,从1开始递增。
", index: " + response.getTransSentenceIndex() +
//当前的识别结果。
", result: " + response.getTransSentenceText() +
//当前已处理的音频时长,单位为毫秒。
", time: " + response.getTransSentenceTime());
}
/**
* 服务端准备好了进行识别
*/
@Override
public void onTranscriberStart(SpeechTranscriberResponse response) {
logger.warn("服务端准备好了进行识别");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public void onSentenceBegin(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的开始");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public void onSentenceEnd(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的结束");
logger.warn("task_id: " + response.getTaskId() +
", name: " + response.getName() +
//状态码“20000000”表示正常识别。
", status: " + response.getStatus() +
//句子编号,从1开始递增。
", index: " + response.getTransSentenceIndex() +
//当前的识别结果。
", result: " + response.getTransSentenceText() +
//置信度
", confidence: " + response.getConfidence() +
//开始时间
", begin_time: " + response.getSentenceBeginTime() +
//当前已处理的音频时长,单位为毫秒。
", time: " + response.getTransSentenceTime());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public void onTranscriptionComplete(SpeechTranscriberResponse response) {
logger.warn("识别结束后返回的最终结果");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus()
+ ",result:" + response.getTransSentenceText());
}
/**
* 失败处理
*/
@Override
public void onFail(SpeechTranscriberResponse response) {
logger.error("失败处理");
logger.error("task_id: " + response.getTaskId()
+ ", status: " + response.getStatus()
+ ", status_text: " + response.getStatusText());
}
});
transcriber.process();
}
public SpeechTranscriber(File speechFile, SpeechTranscriberListener listener) {
if (speechFile != null && speechFile.exists() && speechFile.isFile()) {
if (speechFile == null || !speechFile.exists() || !speechFile.isFile()) {
this.speechFile = speechFile;
this.listener = listener;
} else {
......
package com.yejoin.amos;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
import com.yeejoin.amos.speech.SpeechFlashRecognizer;
import com.yeejoin.amos.speech.SpeechRecognizer;
import com.yeejoin.amos.speech.SpeechTranscriber;
import okhttp3.Call;
import okhttp3.Callback;
import okhttp3.Response;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
public class SpeechApplicationTests {
private static final Logger logger = LoggerFactory.getLogger(SpeechApplicationTests.class);
/**
* 实时语音识别使用示例
*/
@Test
void testSpeechTranscriber() {
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
//将上面fileLink文件下载到本地后,替换filepath为本地地址测试
String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav";
SpeechTranscriber transcriber = new SpeechTranscriber(new File(filepath), new SpeechTranscriberListener() {
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
@Override
public void onTranscriptionResultChange(SpeechTranscriberResponse response) {
logger.warn("语音识别过程中返回的结果");
logger.warn("task_id: " + response.getTaskId() +
", name: " + response.getName() +
//状态码“20000000”表示正常识别。
", status: " + response.getStatus() +
//句子编号,从1开始递增。
", index: " + response.getTransSentenceIndex() +
//当前的识别结果。
", result: " + response.getTransSentenceText() +
//当前已处理的音频时长,单位为毫秒。
", time: " + response.getTransSentenceTime());
}
/**
* 服务端准备好了进行识别
*/
@Override
public void onTranscriberStart(SpeechTranscriberResponse response) {
logger.warn("服务端准备好了进行识别");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus());
}
/**
* 服务端检测到了一句话的开始
*/
@Override
public void onSentenceBegin(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的开始");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus());
}
/**
* 服务端检测到了一句话的结束
* 识别出一句话。服务端会智能断句,当识别到一句话结束时会返回此消息。
*/
@Override
public void onSentenceEnd(SpeechTranscriberResponse response) {
logger.warn("服务端检测到了一句话的结束");
logger.warn("task_id: " + response.getTaskId() +
", name: " + response.getName() +
//状态码“20000000”表示正常识别。
", status: " + response.getStatus() +
//句子编号,从1开始递增。
", index: " + response.getTransSentenceIndex() +
//当前的识别结果。
", result: " + response.getTransSentenceText() +
//置信度
", confidence: " + response.getConfidence() +
//开始时间
", begin_time: " + response.getSentenceBeginTime() +
//当前已处理的音频时长,单位为毫秒。
", time: " + response.getTransSentenceTime());
}
/**
* 识别结束后返回的最终结果
*/
@Override
public void onTranscriptionComplete(SpeechTranscriberResponse response) {
logger.warn("识别结束后返回的最终结果");
logger.warn("task_id: " + response.getTaskId()
+ ", name: " + response.getName()
+ ", status: " + response.getStatus()
+ ",result:" + response.getTransSentenceText());
}
/**
* 失败处理
*/
@Override
public void onFail(SpeechTranscriberResponse response) {
logger.error("失败处理");
logger.error("task_id: " + response.getTaskId()
+ ", status: " + response.getStatus()
+ ", status_text: " + response.getStatusText());
}
});
transcriber.process();
}
/**
* 语音文件识别极速版使用示例
*/
@Test
void testSpeechFlashRecognizer() {
//String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
// 重要:此处用一个本地文件来模拟发送实时流数据,实际使用时,您可以从某处实时采集或接收语音流并发送到ASR服务端。
String fileName = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3";
String format = "mp3";
int sampleRate = 16000;
SpeechFlashRecognizer speechFlashRecognizer = new SpeechFlashRecognizer(new File(fileName), sampleRate, new Callback() {
@Override
public void onFailure(Call call, IOException e) {
logger.error("语音识别失败,原因:" + e.getMessage());
}
@Override
public void onResponse(Call call, Response response) throws IOException {
assert response.body() != null;
logger.warn("语音识别结果:" + response.body().string());
}
});
speechFlashRecognizer.process();
}
/**
* 语音文件识别普通版使用示例
*/
@Test
void testSpeechRecognizer() {
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
SpeechRecognizer speechRecognizer = new SpeechRecognizer();
// 第一步:提交录音文件识别请求,获取任务ID用于后续的识别结果轮询。
String taskId = speechRecognizer.submitFileTransRequest(fileLink);
if (taskId != null) {
logger.info("录音文件识别请求成功,task_id: " + taskId);
} else {
logger.error("录音文件识别请求失败!");
return;
}
// 第二步:根据任务ID轮询识别结果。
String result = speechRecognizer.getFileTransResult(taskId);
if (result != null) {
logger.info("录音文件识别结果查询成功:" + result);
} else {
logger.error("录音文件识别结果查询失败!");
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment