添加阿里云音频识别demo

f192af07 · helinlin · e139997b · f192af07 · f192af07 · f192af07
Commit f192af07 authored Nov 30, 2021 by helinlin
4 changed files
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/HttpUtil.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/HttpUtil.java
@@ -3,8 +3,6 @@ package com.yeejoin.amos.speech.util;
 import okhttp3.*;
 import java.io.File;
-import java.io.IOException;
-import java.net.SocketTimeoutException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
@@ -12,16 +10,24 @@ import java.util.concurrent.TimeUnit;
 /**
 * okHttp客户端
 */
-public class HttpUtil {
+public class AppOkHttpClient {
    static OkHttpClient client = new OkHttpClient
            .Builder()
            .connectTimeout(20, TimeUnit.SECONDS)
-            .readTimeout(60, TimeUnit.SECONDS)
+            .readTimeout(120, TimeUnit.SECONDS)
-            .writeTimeout(60, TimeUnit.SECONDS)
+            .writeTimeout(120, TimeUnit.SECONDS)
            .build();
-    public static String sendPostFile(String url, HashMap<String, String> headers, File speechFile) {
+    /**
+     * okHttp post提交文件请求
+     *
+     * @param url        请求地址
+     * @param headers    请求头
+     * @param speechFile 待识别文件
+     * @param callback   识别回调函数
+     */
+    public static void sendPostFile(String url, HashMap<String, String> headers, File speechFile, Callback callback) {
        RequestBody body = RequestBody.create(MediaType.parse("application/octet-stream"), speechFile);
        Headers.Builder hb = new Headers.Builder();
        if (headers != null && !headers.isEmpty()) {
@@ -34,21 +40,6 @@ public class HttpUtil {
                .headers(hb.build())
                .post(body)
                .build();
-        return getResponseWithTimeout(request);
+        client.newCall(request).enqueue(callback);
-    }
-    private static String getResponseWithTimeout(Request request) {
-        String result = null;
-        try {
-            Response s = client.newCall(request).execute();
-            assert s.body() != null;
-            result = s.body().string();
-            s.close();
-        } catch (SocketTimeoutException e) {
-            System.err.println("get result timeout");
-        } catch (IOException e) {
-            System.err.println("get result error " + e.getMessage());
-        }
-        return result;
    }
 }
\ No newline at end of file
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechFlashRecognizerDemo.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechFlashRecognizerDemo.java
 package com.yeejoin.amos.speech.util;
+import okhttp3.Call;
+import okhttp3.Callback;
+import okhttp3.OkHttpClient;
+import okhttp3.Response;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
 /**
- * 录音文件识别极速版（提交音频文件识别，文件大小，文件时长有限制）
+ * 录音文件识别极速版（支持提交音频文件识别，文件大小，文件时长有限制）
 * <p>
 * 音频格式：支持AAC/MP3/OPUS/WAV格式编码的音频。
 * 使用限制：支持100 MB以内且不超过2小时的音频文件的识别。
 *
- * @see SpeechRecognizerDemo
+ * @see SpeechRecognizer
 * 时长超过2小时的文件请使用录音文件识别普通版
 *
 * <p>
 * 模型类型：8000（电话）/16000（非电话）。
+ * <p>
+ * 同步返回识别结果
 */
-public class SpeechFlashRecognizerDemo {
+public class SpeechFlashRecognizer {
+    private static final Logger logger = LoggerFactory.getLogger(SpeechFlashRecognizer.class);
+    //音频文件
    private final File speechFile;
+    //音频格式
+    private final String format;
+    //音频采样率
+    private final int sampleRate;
+    //识别回调函数
+    private final Callback callback;
    /**
-     * 不支持试用版
+     * 使用示例
+     * 不支持试用，需要付费
+     * 不支持试用，需要付费
+     * 不支持试用，需要付费
     */
    public static void main(String[] args) {
        //String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
@@ -28,12 +48,35 @@ public class SpeechFlashRecognizerDemo {
        String fileName = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3";
        String format = "mp3";
        int sampleRate = 16000;
-        SpeechFlashRecognizerDemo demo = new SpeechFlashRecognizerDemo(new File(fileName));
+        SpeechFlashRecognizer speechFlashRecognizer = new SpeechFlashRecognizer(new File(fileName), format, sampleRate, new Callback() {
-        demo.process(AppNslClientToken.instance().getToken(), format, sampleRate);
+            @Override
+            public void onFailure(Call call, IOException e) {
+                logger.error("语音识别失败，原因：" + e.getMessage());
+            }
+            @Override
+            public void onResponse(Call call, Response response) throws IOException {
+                assert response.body() != null;
+                logger.warn("语音识别结果：" + response.body().string());
+            }
+        });
+        speechFlashRecognizer.process();
    }
-    public SpeechFlashRecognizerDemo(File speechFile) {
+    public SpeechFlashRecognizer(File speechFile, String format, int sampleRate, Callback callback) {
+        if (speechFile != null && speechFile.exists() && speechFile.isFile()) {
+            throw new IllegalArgumentException("待识别的文件存在异常");
+        }
+        if (null == format || "".equals(format)) {
+            throw new IllegalArgumentException("音频文件格式不能为空");
+        }
+        if (sampleRate == 0) {
+            throw new IllegalArgumentException("音频采样率不能为0");
+        }
        this.speechFile = speechFile;
+        this.format = format;
+        this.sampleRate = sampleRate;
+        this.callback = callback;
    }
    /**
@@ -42,32 +85,17 @@ public class SpeechFlashRecognizerDemo {
     * 2.语音识别服务域名：nls-gateway.cn-shanghai.aliyuncs.com
     * 3.语音识别接口请求路径：/stream/v1/FlashRecognizer
     * 4.设置必须请求参数：appkey、token、format、sample_rate
+     * 5.Content-Type 类型必须为 application/octet-stream
     */
-    public void process(String token, String format, int sampleRate) {
+    public void process() {
        String request = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/FlashRecognizer";
        request = request + "?appkey=" + SpeechConfig.AppKey;
-        request = request + "&token=" + token;
+        request = request + "&token=" + AppNslClientToken.instance().getToken();
        request = request + "&format=" + format;
        request = request + "&sample_rate=" + sampleRate;
+        logger.info("Request: " + request);
-        System.out.println("Request: " + request);
-        /**
-         * 设置HTTPS头部字段
-         *
-         * 1.Content-Type：application/octet-stream
-         */
        HashMap<String, String> headers = new HashMap<>();
        headers.put("Content-Type", "application/octet-stream");
-        /**
+        AppOkHttpClient.sendPostFile(request, headers, this.speechFile, this.callback);
-         * 发送HTTPS POST请求，返回服务端的响应。
-         */
-        long start = System.currentTimeMillis();
-        String response = HttpUtil.sendPostFile(request, headers, this.speechFile);
-        System.out.println("latency = " + (System.currentTimeMillis() - start) + " ms");
-        if (response != null) {
-            System.out.println("Response: " + response);
-        } else {
-            System.err.println("识别失败!");
-        }
    }
 }
\ No newline at end of file
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechRecognizerDemo.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechRecognizerDemo.java
@@ -19,7 +19,7 @@ import java.util.concurrent.TimeUnit;
 * 识别的文件需要提交基于HTTP可访问的URL地址，可以通过URL访问，不支持提交本地文件
 * 上传的录音文件URL的访问权限需要设置为公开，URL中只能使用域名不能使用IP地址、不可包含空格
 *
- * @see SpeechFlashRecognizerDemo
+ * @see SpeechFlashRecognizer
 * （内网考虑使用录音文件识别极速版，极速版可以提交文件识别，但极速版不提供试用）
 * <p>
 * 支持单轨/双轨的.wav、.mp3、.m4a、.wma、.aac、.ogg、.amr、.flac格式录音文件识别
@@ -27,7 +27,8 @@ import java.util.concurrent.TimeUnit;
 * 免费用户每日可识别不超过2小时时长的录音文件
 * 提交录音文件识别请求后，免费用户的识别任务在24小时内完成并返回识别文本； 付费用户的识别任务在6小时内完成并返回识别文本。识别结果在服务端可保存72小时
 */
-public class SpeechRecognizerDemo {
+@Deprecated
+public class SpeechRecognizer {
    // 地域ID，常量，固定值。
    public static final String REGIONID = "cn-shanghai";
    public static final String ENDPOINTNAME = "cn-shanghai";
@@ -56,23 +57,21 @@ public class SpeechRecognizerDemo {
    // 阿里云鉴权client
    IAcsClient client;
-    private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizerDemo.class);
+    private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizer.class);
    public static void main(String[] args) throws Exception {
        String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
-        SpeechRecognizerDemo demo = new SpeechRecognizerDemo();
+        SpeechRecognizer speechRecognizer = new SpeechRecognizer();
        // 第一步：提交录音文件识别请求，获取任务ID用于后续的识别结果轮询。
-        String taskId = demo.submitFileTransRequest(fileLink);
+        String taskId = speechRecognizer.submitFileTransRequest(fileLink);
        if (taskId != null) {
            logger.info("录音文件识别请求成功，task_id: " + taskId);
        } else {
            logger.error("录音文件识别请求失败！");
            return;
        }
        // 第二步：根据任务ID轮询识别结果。
-        String result = demo.getFileTransResult(taskId);
+        String result = speechRecognizer.getFileTransResult(taskId);
        if (result != null) {
            logger.info("录音文件识别结果查询成功：" + result);
        } else {
@@ -80,7 +79,7 @@ public class SpeechRecognizerDemo {
        }
    }
-    public SpeechRecognizerDemo() {
+    public SpeechRecognizer() {
        // 设置endpoint
        try {
            DefaultProfile.addEndpoint(ENDPOINTNAME, REGIONID, PRODUCT, DOMAIN);

--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechTranscriberDemo.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechTranscriberDemo.java
@@ -2,7 +2,6 @@ package com.yeejoin.amos.speech.util;
 import com.alibaba.nls.client.protocol.InputFormatEnum;
 import com.alibaba.nls.client.protocol.SampleRateEnum;
-import com.alibaba.nls.client.protocol.asr.SpeechTranscriber;
 import com.alibaba.nls.client.protocol.asr.SpeechTranscriberListener;
 import com.alibaba.nls.client.protocol.asr.SpeechTranscriberResponse;
 import org.slf4j.Logger;
@@ -10,7 +9,6 @@ import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.IOException;
 import java.util.concurrent.TimeUnit;
 import static com.yeejoin.amos.speech.util.SpeechConfig.AppKey;
@@ -22,22 +20,24 @@ import static com.yeejoin.amos.speech.util.SpeechConfig.AppKey;
 * 支持的输入格式：PCM（无压缩的PCM或WAV文件）、16 bit采样位数、单声道（mono）。
 * 支持的音频采样率：8000 Hz和16000 Hz。
 */
-public class SpeechTranscriberDemo {
+public class SpeechTranscriber {
-    private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriberDemo.class);
+    private static final Logger logger = LoggerFactory.getLogger(SpeechTranscriber.class);
+    //语音识别文件
    private final File speechFile;
+    //识别回调函数
    private final SpeechTranscriberListener listener;
    /**
     * 使用示例
-     *
+     * 提供音频留地址
     */
    public static void main(String[] args) {
        //本案例使用本地文件模拟发送实时流数据。您在实际使用时，可以实时采集或接收语音流并发送到ASR服务端。
        String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
        //将上面fileLink文件下载到本地后，替换filepath为本地地址测试
        String filepath = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test1.wav";
-        SpeechTranscriberDemo transcriberDemo = new SpeechTranscriberDemo(new File(filepath), new SpeechTranscriberListener() {
+        SpeechTranscriber transcriber = new SpeechTranscriber(new File(filepath), new SpeechTranscriberListener() {
            /**
             * 语音识别过程中返回的结果。仅当setEnableIntermediateResult为true时，才会返回该消息。
             */
@@ -125,11 +125,11 @@ public class SpeechTranscriberDemo {
                        + ", status_text: " + response.getStatusText());
            }
        });
-        transcriberDemo.process();
+        transcriber.process();
    }
-    public SpeechTranscriberDemo(File speechFile, SpeechTranscriberListener listener) {
+    public SpeechTranscriber(File speechFile, SpeechTranscriberListener listener) {
        if (speechFile != null && speechFile.exists() && speechFile.isFile()) {
            this.speechFile = speechFile;
            this.listener = listener;
@@ -142,10 +142,10 @@ public class SpeechTranscriberDemo {
     * 开始语音识别
     */
    public void process() {
-        SpeechTranscriber transcriber = null;
+        com.alibaba.nls.client.protocol.asr.SpeechTranscriber transcriber = null;
        try {
            //创建实例、建立连接。
-            transcriber = new SpeechTranscriber(AppNslClient.instance(), listener);
+            transcriber = new com.alibaba.nls.client.protocol.asr.SpeechTranscriber(AppNslClient.instance(), listener);
            transcriber.setAppKey(AppKey);
            //输入音频编码方式。
            transcriber.setFormat(InputFormatEnum.PCM);