Commit f192af07 authored by helinlin's avatar helinlin

添加阿里云音频识别demo

parent e139997b
...@@ -3,8 +3,6 @@ package com.yeejoin.amos.speech.util; ...@@ -3,8 +3,6 @@ package com.yeejoin.amos.speech.util;
import okhttp3.*; import okhttp3.*;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
...@@ -12,16 +10,24 @@ import java.util.concurrent.TimeUnit; ...@@ -12,16 +10,24 @@ import java.util.concurrent.TimeUnit;
/** /**
* okHttp客户端 * okHttp客户端
*/ */
public class HttpUtil { public class AppOkHttpClient {
static OkHttpClient client = new OkHttpClient static OkHttpClient client = new OkHttpClient
.Builder() .Builder()
.connectTimeout(20, TimeUnit.SECONDS) .connectTimeout(20, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS) .readTimeout(120, TimeUnit.SECONDS)
.writeTimeout(60, TimeUnit.SECONDS) .writeTimeout(120, TimeUnit.SECONDS)
.build(); .build();
public static String sendPostFile(String url, HashMap<String, String> headers, File speechFile) { /**
* okHttp post提交文件请求
*
* @param url 请求地址
* @param headers 请求头
* @param speechFile 待识别文件
* @param callback 识别回调函数
*/
public static void sendPostFile(String url, HashMap<String, String> headers, File speechFile, Callback callback) {
RequestBody body = RequestBody.create(MediaType.parse("application/octet-stream"), speechFile); RequestBody body = RequestBody.create(MediaType.parse("application/octet-stream"), speechFile);
Headers.Builder hb = new Headers.Builder(); Headers.Builder hb = new Headers.Builder();
if (headers != null && !headers.isEmpty()) { if (headers != null && !headers.isEmpty()) {
...@@ -34,21 +40,6 @@ public class HttpUtil { ...@@ -34,21 +40,6 @@ public class HttpUtil {
.headers(hb.build()) .headers(hb.build())
.post(body) .post(body)
.build(); .build();
return getResponseWithTimeout(request); client.newCall(request).enqueue(callback);
}
private static String getResponseWithTimeout(Request request) {
String result = null;
try {
Response s = client.newCall(request).execute();
assert s.body() != null;
result = s.body().string();
s.close();
} catch (SocketTimeoutException e) {
System.err.println("get result timeout");
} catch (IOException e) {
System.err.println("get result error " + e.getMessage());
}
return result;
} }
} }
\ No newline at end of file
package com.yeejoin.amos.speech.util; package com.yeejoin.amos.speech.util;
import okhttp3.Call;
import okhttp3.Callback;
import okhttp3.OkHttpClient;
import okhttp3.Response;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
/** /**
* 录音文件识别极速版(提交音频文件识别,文件大小,文件时长有限制) * 录音文件识别极速版(支持提交音频文件识别,文件大小,文件时长有限制)
* <p> * <p>
* 音频格式:支持AAC/MP3/OPUS/WAV格式编码的音频。 * 音频格式:支持AAC/MP3/OPUS/WAV格式编码的音频。
* 使用限制:支持100 MB以内且不超过2小时的音频文件的识别。 * 使用限制:支持100 MB以内且不超过2小时的音频文件的识别。
* *
* @see SpeechRecognizerDemo * @see SpeechRecognizer
* 时长超过2小时的文件请使用录音文件识别普通版 * 时长超过2小时的文件请使用录音文件识别普通版
* *
* <p> * <p>
* 模型类型:8000(电话)/16000(非电话)。 * 模型类型:8000(电话)/16000(非电话)。
* <p>
* 同步返回识别结果
*/ */
public class SpeechFlashRecognizerDemo { public class SpeechFlashRecognizer {
private static final Logger logger = LoggerFactory.getLogger(SpeechFlashRecognizer.class);
//音频文件
private final File speechFile; private final File speechFile;
//音频格式
private final String format;
//音频采样率
private final int sampleRate;
//识别回调函数
private final Callback callback;
/** /**
* 不支持试用版 * 使用示例
* 不支持试用,需要付费
* 不支持试用,需要付费
* 不支持试用,需要付费
*/ */
public static void main(String[] args) { public static void main(String[] args) {
//String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath(); //String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
...@@ -28,12 +48,35 @@ public class SpeechFlashRecognizerDemo { ...@@ -28,12 +48,35 @@ public class SpeechFlashRecognizerDemo {
String fileName = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3"; String fileName = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3";
String format = "mp3"; String format = "mp3";
int sampleRate = 16000; int sampleRate = 16000;
SpeechFlashRecognizerDemo demo = new SpeechFlashRecognizerDemo(new File(fileName)); SpeechFlashRecognizer speechFlashRecognizer = new SpeechFlashRecognizer(new File(fileName), format, sampleRate, new Callback() {
demo.process(AppNslClientToken.instance().getToken(), format, sampleRate); @Override
public void onFailure(Call call, IOException e) {
logger.error("语音识别失败,原因:" + e.getMessage());
}
@Override
public void onResponse(Call call, Response response) throws IOException {
assert response.body() != null;
logger.warn("语音识别结果:" + response.body().string());
}
});
speechFlashRecognizer.process();
} }
public SpeechFlashRecognizerDemo(File speechFile) { public SpeechFlashRecognizer(File speechFile, String format, int sampleRate, Callback callback) {
if (speechFile != null && speechFile.exists() && speechFile.isFile()) {
throw new IllegalArgumentException("待识别的文件存在异常");
}
if (null == format || "".equals(format)) {
throw new IllegalArgumentException("音频文件格式不能为空");
}
if (sampleRate == 0) {
throw new IllegalArgumentException("音频采样率不能为0");
}
this.speechFile = speechFile; this.speechFile = speechFile;
this.format = format;
this.sampleRate = sampleRate;
this.callback = callback;
} }
/** /**
...@@ -42,32 +85,17 @@ public class SpeechFlashRecognizerDemo { ...@@ -42,32 +85,17 @@ public class SpeechFlashRecognizerDemo {
* 2.语音识别服务域名:nls-gateway.cn-shanghai.aliyuncs.com * 2.语音识别服务域名:nls-gateway.cn-shanghai.aliyuncs.com
* 3.语音识别接口请求路径:/stream/v1/FlashRecognizer * 3.语音识别接口请求路径:/stream/v1/FlashRecognizer
* 4.设置必须请求参数:appkey、token、format、sample_rate * 4.设置必须请求参数:appkey、token、format、sample_rate
* 5.Content-Type 类型必须为 application/octet-stream
*/ */
public void process(String token, String format, int sampleRate) { public void process() {
String request = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/FlashRecognizer"; String request = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/FlashRecognizer";
request = request + "?appkey=" + SpeechConfig.AppKey; request = request + "?appkey=" + SpeechConfig.AppKey;
request = request + "&token=" + token; request = request + "&token=" + AppNslClientToken.instance().getToken();
request = request + "&format=" + format; request = request + "&format=" + format;
request = request + "&sample_rate=" + sampleRate; request = request + "&sample_rate=" + sampleRate;
logger.info("Request: " + request);
System.out.println("Request: " + request);
/**
* 设置HTTPS头部字段
*
* 1.Content-Type:application/octet-stream
*/
HashMap<String, String> headers = new HashMap<>(); HashMap<String, String> headers = new HashMap<>();
headers.put("Content-Type", "application/octet-stream"); headers.put("Content-Type", "application/octet-stream");
/** AppOkHttpClient.sendPostFile(request, headers, this.speechFile, this.callback);
* 发送HTTPS POST请求,返回服务端的响应。
*/
long start = System.currentTimeMillis();
String response = HttpUtil.sendPostFile(request, headers, this.speechFile);
System.out.println("latency = " + (System.currentTimeMillis() - start) + " ms");
if (response != null) {
System.out.println("Response: " + response);
} else {
System.err.println("识别失败!");
}
} }
} }
\ No newline at end of file
...@@ -19,7 +19,7 @@ import java.util.concurrent.TimeUnit; ...@@ -19,7 +19,7 @@ import java.util.concurrent.TimeUnit;
* 识别的文件需要提交基于HTTP可访问的URL地址,可以通过URL访问,不支持提交本地文件 * 识别的文件需要提交基于HTTP可访问的URL地址,可以通过URL访问,不支持提交本地文件
* 上传的录音文件URL的访问权限需要设置为公开,URL中只能使用域名不能使用IP地址、不可包含空格 * 上传的录音文件URL的访问权限需要设置为公开,URL中只能使用域名不能使用IP地址、不可包含空格
* *
* @see SpeechFlashRecognizerDemo * @see SpeechFlashRecognizer
* (内网考虑使用录音文件识别极速版,极速版可以提交文件识别,但极速版不提供试用) * (内网考虑使用录音文件识别极速版,极速版可以提交文件识别,但极速版不提供试用)
* <p> * <p>
* 支持单轨/双轨的.wav、.mp3、.m4a、.wma、.aac、.ogg、.amr、.flac格式录音文件识别 * 支持单轨/双轨的.wav、.mp3、.m4a、.wma、.aac、.ogg、.amr、.flac格式录音文件识别
...@@ -27,7 +27,8 @@ import java.util.concurrent.TimeUnit; ...@@ -27,7 +27,8 @@ import java.util.concurrent.TimeUnit;
* 免费用户每日可识别不超过2小时时长的录音文件 * 免费用户每日可识别不超过2小时时长的录音文件
* 提交录音文件识别请求后,免费用户的识别任务在24小时内完成并返回识别文本; 付费用户的识别任务在6小时内完成并返回识别文本。识别结果在服务端可保存72小时 * 提交录音文件识别请求后,免费用户的识别任务在24小时内完成并返回识别文本; 付费用户的识别任务在6小时内完成并返回识别文本。识别结果在服务端可保存72小时
*/ */
public class SpeechRecognizerDemo { @Deprecated
public class SpeechRecognizer {
// 地域ID,常量,固定值。 // 地域ID,常量,固定值。
public static final String REGIONID = "cn-shanghai"; public static final String REGIONID = "cn-shanghai";
public static final String ENDPOINTNAME = "cn-shanghai"; public static final String ENDPOINTNAME = "cn-shanghai";
...@@ -56,23 +57,21 @@ public class SpeechRecognizerDemo { ...@@ -56,23 +57,21 @@ public class SpeechRecognizerDemo {
// 阿里云鉴权client // 阿里云鉴权client
IAcsClient client; IAcsClient client;
private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizerDemo.class); private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizer.class);
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav"; String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
SpeechRecognizerDemo demo = new SpeechRecognizerDemo(); SpeechRecognizer speechRecognizer = new SpeechRecognizer();
// 第一步:提交录音文件识别请求,获取任务ID用于后续的识别结果轮询。 // 第一步:提交录音文件识别请求,获取任务ID用于后续的识别结果轮询。
String taskId = demo.submitFileTransRequest(fileLink); String taskId = speechRecognizer.submitFileTransRequest(fileLink);
if (taskId != null) { if (taskId != null) {
logger.info("录音文件识别请求成功,task_id: " + taskId); logger.info("录音文件识别请求成功,task_id: " + taskId);
} else { } else {
logger.error("录音文件识别请求失败!"); logger.error("录音文件识别请求失败!");
return; return;
} }
// 第二步:根据任务ID轮询识别结果。 // 第二步:根据任务ID轮询识别结果。
String result = demo.getFileTransResult(taskId); String result = speechRecognizer.getFileTransResult(taskId);
if (result != null) { if (result != null) {
logger.info("录音文件识别结果查询成功:" + result); logger.info("录音文件识别结果查询成功:" + result);
} else { } else {
...@@ -80,7 +79,7 @@ public class SpeechRecognizerDemo { ...@@ -80,7 +79,7 @@ public class SpeechRecognizerDemo {
} }
} }
public SpeechRecognizerDemo() { public SpeechRecognizer() {
// 设置endpoint // 设置endpoint
try { try {
DefaultProfile.addEndpoint(ENDPOINTNAME, REGIONID, PRODUCT, DOMAIN); DefaultProfile.addEndpoint(ENDPOINTNAME, REGIONID, PRODUCT, DOMAIN);
......
...@@ -2,7 +2,6 @@ package com.yeejoin.amos.speech.util; ...@@ -2,7 +2,6 @@ package com.yeejoin.amos.speech.util;
import com.alibaba.nls.client.protocol.InputFormatEnum; import com.alibaba.nls.client.protocol.InputFormatEnum;
import com.alibaba.nls.client.protocol.SampleRateEnum; import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriber;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener; import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse; import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
import org.slf4j.Logger; import org.slf4j.Logger;
...@@ -10,7 +9,6 @@ import org.slf4j.LoggerFactory; ...@@ -10,7 +9,6 @@ import org.slf4j.LoggerFactory;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import static com.yeejoin.amos.speech.util.SpeechConfig.AppKey; import static com.yeejoin.amos.speech.util.SpeechConfig.AppKey;
...@@ -22,22 +20,24 @@ import static com.yeejoin.amos.speech.util.SpeechConfig.AppKey; ...@@ -22,22 +20,24 @@ import static com.yeejoin.amos.speech.util.SpeechConfig.AppKey;
* 支持的输入格式:PCM(无压缩的PCM或WAV文件)、16 bit采样位数、单声道(mono)。 * 支持的输入格式:PCM(无压缩的PCM或WAV文件)、16 bit采样位数、单声道(mono)。
* 支持的音频采样率:8000 Hz和16000 Hz。 * 支持的音频采样率:8000 Hz和16000 Hz。
*/ */
public class SpeechTranscriberDemo { public class SpeechTranscriber {
private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriberDemo.class); private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriber.class);
//语音识别文件
private final File speechFile; private final File speechFile;
//识别回调函数
private final SpeechTranscriberListener listener; private final SpeechTranscriberListener listener;
/** /**
* 使用示例 * 使用示例
* * 提供音频留地址
*/ */
public static void main(String[] args) { public static void main(String[] args) {
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。 //本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav"; String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
//将上面fileLink文件下载到本地后,替换filepath为本地地址测试 //将上面fileLink文件下载到本地后,替换filepath为本地地址测试
String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav"; String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav";
SpeechTranscriberDemo transcriberDemo = new SpeechTranscriberDemo(new File(filepath), new SpeechTranscriberListener() { SpeechTranscriber transcriber = new SpeechTranscriber(new File(filepath), new SpeechTranscriberListener() {
/** /**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。 * 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/ */
...@@ -125,11 +125,11 @@ public class SpeechTranscriberDemo { ...@@ -125,11 +125,11 @@ public class SpeechTranscriberDemo {
+ ", status_text: " + response.getStatusText()); + ", status_text: " + response.getStatusText());
} }
}); });
transcriberDemo.process(); transcriber.process();
} }
public SpeechTranscriberDemo(File speechFile, SpeechTranscriberListener listener) { public SpeechTranscriber(File speechFile, SpeechTranscriberListener listener) {
if (speechFile != null && speechFile.exists() && speechFile.isFile()) { if (speechFile != null && speechFile.exists() && speechFile.isFile()) {
this.speechFile = speechFile; this.speechFile = speechFile;
this.listener = listener; this.listener = listener;
...@@ -142,10 +142,10 @@ public class SpeechTranscriberDemo { ...@@ -142,10 +142,10 @@ public class SpeechTranscriberDemo {
* 开始语音识别 * 开始语音识别
*/ */
public void process() { public void process() {
SpeechTranscriber transcriber = null; com.alibaba.nls.client.protocol.asr.SpeechTranscriber transcriber = null;
try { try {
//创建实例、建立连接。 //创建实例、建立连接。
transcriber = new SpeechTranscriber(AppNslClient.instance(), listener); transcriber = new com.alibaba.nls.client.protocol.asr.SpeechTranscriber(AppNslClient.instance(), listener);
transcriber.setAppKey(AppKey); transcriber.setAppKey(AppKey);
//输入音频编码方式。 //输入音频编码方式。
transcriber.setFormat(InputFormatEnum.PCM); transcriber.setFormat(InputFormatEnum.PCM);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment