Commit f192af07 authored by helinlin's avatar helinlin

添加阿里云音频识别demo

parent e139997b
......@@ -3,8 +3,6 @@ package com.yeejoin.amos.speech.util;
import okhttp3.*;
import java.io.File;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
......@@ -12,16 +10,24 @@ import java.util.concurrent.TimeUnit;
/**
* okHttp客户端
*/
public class HttpUtil {
public class AppOkHttpClient {
static OkHttpClient client = new OkHttpClient
.Builder()
.connectTimeout(20, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(60, TimeUnit.SECONDS)
.readTimeout(120, TimeUnit.SECONDS)
.writeTimeout(120, TimeUnit.SECONDS)
.build();
public static String sendPostFile(String url, HashMap<String, String> headers, File speechFile) {
/**
* okHttp post提交文件请求
*
* @param url 请求地址
* @param headers 请求头
* @param speechFile 待识别文件
* @param callback 识别回调函数
*/
public static void sendPostFile(String url, HashMap<String, String> headers, File speechFile, Callback callback) {
RequestBody body = RequestBody.create(MediaType.parse("application/octet-stream"), speechFile);
Headers.Builder hb = new Headers.Builder();
if (headers != null && !headers.isEmpty()) {
......@@ -34,21 +40,6 @@ public class HttpUtil {
.headers(hb.build())
.post(body)
.build();
return getResponseWithTimeout(request);
}
private static String getResponseWithTimeout(Request request) {
String result = null;
try {
Response s = client.newCall(request).execute();
assert s.body() != null;
result = s.body().string();
s.close();
} catch (SocketTimeoutException e) {
System.err.println("get result timeout");
} catch (IOException e) {
System.err.println("get result error " + e.getMessage());
}
return result;
client.newCall(request).enqueue(callback);
}
}
\ No newline at end of file
package com.yeejoin.amos.speech.util;
import okhttp3.Call;
import okhttp3.Callback;
import okhttp3.OkHttpClient;
import okhttp3.Response;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
/**
* 录音文件识别极速版(提交音频文件识别,文件大小,文件时长有限制)
* 录音文件识别极速版(支持提交音频文件识别,文件大小,文件时长有限制)
* <p>
* 音频格式:支持AAC/MP3/OPUS/WAV格式编码的音频。
* 使用限制:支持100 MB以内且不超过2小时的音频文件的识别。
*
* @see SpeechRecognizerDemo
* @see SpeechRecognizer
* 时长超过2小时的文件请使用录音文件识别普通版
*
* <p>
* 模型类型:8000(电话)/16000(非电话)。
* <p>
* 同步返回识别结果
*/
public class SpeechFlashRecognizerDemo {
public class SpeechFlashRecognizer {
private static final Logger logger = LoggerFactory.getLogger(SpeechFlashRecognizer.class);
//音频文件
private final File speechFile;
//音频格式
private final String format;
//音频采样率
private final int sampleRate;
//识别回调函数
private final Callback callback;
/**
* 不支持试用版
* 使用示例
* 不支持试用,需要付费
* 不支持试用,需要付费
* 不支持试用,需要付费
*/
public static void main(String[] args) {
//String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
......@@ -28,12 +48,35 @@ public class SpeechFlashRecognizerDemo {
String fileName = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3";
String format = "mp3";
int sampleRate = 16000;
SpeechFlashRecognizerDemo demo = new SpeechFlashRecognizerDemo(new File(fileName));
demo.process(AppNslClientToken.instance().getToken(), format, sampleRate);
SpeechFlashRecognizer speechFlashRecognizer = new SpeechFlashRecognizer(new File(fileName), format, sampleRate, new Callback() {
@Override
public void onFailure(Call call, IOException e) {
logger.error("语音识别失败,原因:" + e.getMessage());
}
@Override
public void onResponse(Call call, Response response) throws IOException {
assert response.body() != null;
logger.warn("语音识别结果:" + response.body().string());
}
});
speechFlashRecognizer.process();
}
public SpeechFlashRecognizerDemo(File speechFile) {
public SpeechFlashRecognizer(File speechFile, String format, int sampleRate, Callback callback) {
if (speechFile != null && speechFile.exists() && speechFile.isFile()) {
throw new IllegalArgumentException("待识别的文件存在异常");
}
if (null == format || "".equals(format)) {
throw new IllegalArgumentException("音频文件格式不能为空");
}
if (sampleRate == 0) {
throw new IllegalArgumentException("音频采样率不能为0");
}
this.speechFile = speechFile;
this.format = format;
this.sampleRate = sampleRate;
this.callback = callback;
}
/**
......@@ -42,32 +85,17 @@ public class SpeechFlashRecognizerDemo {
* 2.语音识别服务域名:nls-gateway.cn-shanghai.aliyuncs.com
* 3.语音识别接口请求路径:/stream/v1/FlashRecognizer
* 4.设置必须请求参数:appkey、token、format、sample_rate
* 5.Content-Type 类型必须为 application/octet-stream
*/
public void process(String token, String format, int sampleRate) {
public void process() {
String request = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/FlashRecognizer";
request = request + "?appkey=" + SpeechConfig.AppKey;
request = request + "&token=" + token;
request = request + "&token=" + AppNslClientToken.instance().getToken();
request = request + "&format=" + format;
request = request + "&sample_rate=" + sampleRate;
System.out.println("Request: " + request);
/**
* 设置HTTPS头部字段
*
* 1.Content-Type:application/octet-stream
*/
logger.info("Request: " + request);
HashMap<String, String> headers = new HashMap<>();
headers.put("Content-Type", "application/octet-stream");
/**
* 发送HTTPS POST请求,返回服务端的响应。
*/
long start = System.currentTimeMillis();
String response = HttpUtil.sendPostFile(request, headers, this.speechFile);
System.out.println("latency = " + (System.currentTimeMillis() - start) + " ms");
if (response != null) {
System.out.println("Response: " + response);
} else {
System.err.println("识别失败!");
}
AppOkHttpClient.sendPostFile(request, headers, this.speechFile, this.callback);
}
}
\ No newline at end of file
......@@ -19,7 +19,7 @@ import java.util.concurrent.TimeUnit;
* 识别的文件需要提交基于HTTP可访问的URL地址,可以通过URL访问,不支持提交本地文件
* 上传的录音文件URL的访问权限需要设置为公开,URL中只能使用域名不能使用IP地址、不可包含空格
*
* @see SpeechFlashRecognizerDemo
* @see SpeechFlashRecognizer
* (内网考虑使用录音文件识别极速版,极速版可以提交文件识别,但极速版不提供试用)
* <p>
* 支持单轨/双轨的.wav、.mp3、.m4a、.wma、.aac、.ogg、.amr、.flac格式录音文件识别
......@@ -27,7 +27,8 @@ import java.util.concurrent.TimeUnit;
* 免费用户每日可识别不超过2小时时长的录音文件
* 提交录音文件识别请求后,免费用户的识别任务在24小时内完成并返回识别文本; 付费用户的识别任务在6小时内完成并返回识别文本。识别结果在服务端可保存72小时
*/
public class SpeechRecognizerDemo {
@Deprecated
public class SpeechRecognizer {
// 地域ID,常量,固定值。
public static final String REGIONID = "cn-shanghai";
public static final String ENDPOINTNAME = "cn-shanghai";
......@@ -56,23 +57,21 @@ public class SpeechRecognizerDemo {
// 阿里云鉴权client
IAcsClient client;
private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizerDemo.class);
private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizer.class);
public static void main(String[] args) throws Exception {
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
SpeechRecognizerDemo demo = new SpeechRecognizerDemo();
SpeechRecognizer speechRecognizer = new SpeechRecognizer();
// 第一步:提交录音文件识别请求,获取任务ID用于后续的识别结果轮询。
String taskId = demo.submitFileTransRequest(fileLink);
String taskId = speechRecognizer.submitFileTransRequest(fileLink);
if (taskId != null) {
logger.info("录音文件识别请求成功,task_id: " + taskId);
} else {
logger.error("录音文件识别请求失败!");
return;
}
// 第二步:根据任务ID轮询识别结果。
String result = demo.getFileTransResult(taskId);
String result = speechRecognizer.getFileTransResult(taskId);
if (result != null) {
logger.info("录音文件识别结果查询成功:" + result);
} else {
......@@ -80,7 +79,7 @@ public class SpeechRecognizerDemo {
}
}
public SpeechRecognizerDemo() {
public SpeechRecognizer() {
// 设置endpoint
try {
DefaultProfile.addEndpoint(ENDPOINTNAME, REGIONID, PRODUCT, DOMAIN);
......
......@@ -2,7 +2,6 @@ package com.yeejoin.amos.speech.util;
import com.alibaba.nls.client.protocol.InputFormatEnum;
import com.alibaba.nls.client.protocol.SampleRateEnum;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriber;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
import org.slf4j.Logger;
......@@ -10,7 +9,6 @@ import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import static com.yeejoin.amos.speech.util.SpeechConfig.AppKey;
......@@ -22,22 +20,24 @@ import static com.yeejoin.amos.speech.util.SpeechConfig.AppKey;
* 支持的输入格式:PCM(无压缩的PCM或WAV文件)、16 bit采样位数、单声道(mono)。
* 支持的音频采样率:8000 Hz和16000 Hz。
*/
public class SpeechTranscriberDemo {
public class SpeechTranscriber {
private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriberDemo.class);
private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriber.class);
//语音识别文件
private final File speechFile;
//识别回调函数
private final SpeechTranscriberListener listener;
/**
* 使用示例
*
* 提供音频留地址
*/
public static void main(String[] args) {
//本案例使用本地文件模拟发送实时流数据。您在实际使用时,可以实时采集或接收语音流并发送到ASR服务端。
String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
//将上面fileLink文件下载到本地后,替换filepath为本地地址测试
String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav";
SpeechTranscriberDemo transcriberDemo = new SpeechTranscriberDemo(new File(filepath), new SpeechTranscriberListener() {
SpeechTranscriber transcriber = new SpeechTranscriber(new File(filepath), new SpeechTranscriberListener() {
/**
* 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时,才会返回该消息。
*/
......@@ -125,11 +125,11 @@ public class SpeechTranscriberDemo {
+ ", status_text: " + response.getStatusText());
}
});
transcriberDemo.process();
transcriber.process();
}
public SpeechTranscriberDemo(File speechFile, SpeechTranscriberListener listener) {
public SpeechTranscriber(File speechFile, SpeechTranscriberListener listener) {
if (speechFile != null && speechFile.exists() && speechFile.isFile()) {
this.speechFile = speechFile;
this.listener = listener;
......@@ -142,10 +142,10 @@ public class SpeechTranscriberDemo {
* 开始语音识别
*/
public void process() {
SpeechTranscriber transcriber = null;
com.alibaba.nls.client.protocol.asr.SpeechTranscriber transcriber = null;
try {
//创建实例、建立连接。
transcriber = new SpeechTranscriber(AppNslClient.instance(), listener);
transcriber = new com.alibaba.nls.client.protocol.asr.SpeechTranscriber(AppNslClient.instance(), listener);
transcriber.setAppKey(AppKey);
//输入音频编码方式。
transcriber.setFormat(InputFormatEnum.PCM);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment