添加阿里云音频识别demo

54766f8a · helinlin · 74ea674a · 54766f8a · 54766f8a · 54766f8a
Commit 54766f8a authored Nov 30, 2021 by helinlin
9 changed files
--- a/amos-boot-utils/amos-boot-utils-speech/pom.xml
+++ b/amos-boot-utils/amos-boot-utils-speech/pom.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <artifactId>amos-boot-utils</artifactId>
+        <groupId>com.amosframework.boot</groupId>
+        <version>1.0.0</version>
+    </parent>
+
+    <artifactId>amos-boot-utils-speech</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>com.amosframework.boot</groupId>
+            <artifactId>amos-boot-core</artifactId>
+            <version>${amos-biz-boot.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.alibaba</groupId>
+            <artifactId>fastjson</artifactId>
+            <version>1.2.73</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.aliyun</groupId>
+            <artifactId>aliyun-java-sdk-core</artifactId>
+            <version>3.7.1</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.alibaba.nls</groupId>
+            <artifactId>nls-sdk-transcriber</artifactId>
+            <version>2.2.1</version>
+        </dependency>
+    </dependencies>
+
+</project>
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/SpeechApplication.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/SpeechApplication.java
+package com.yeejoin.amos;
+
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+
+@SpringBootApplication
+public class SpeechApplication {
+
+    public static void main(String[] args) {
+       SpringApplication.run(SpeechApplication.class, args);
+    }
+}
\ No newline at end of file
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/HttpUtil.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/HttpUtil.java
+package com.yeejoin.amos.speech.util;
+
+import okhttp3.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.SocketTimeoutException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * okHttp客户端
+ */
+public class HttpUtil {
+
+    static OkHttpClient client = new OkHttpClient
+            .Builder()
+            .connectTimeout(20, TimeUnit.SECONDS)
+            .readTimeout(60, TimeUnit.SECONDS)
+            .writeTimeout(60, TimeUnit.SECONDS)
+            .build();
+
+    public static String sendPostFile(String url, HashMap<String, String> headers, File speechFile) {
+        RequestBody body;
+        if (!speechFile.isFile()) {
+            System.err.println("The filePath is not a file: " + speechFile.getPath());
+            return null;
+        } else {
+            body = RequestBody.create(MediaType.parse("application/octet-stream"), speechFile);
+        }
+        Headers.Builder hb = new Headers.Builder();
+        if (headers != null && !headers.isEmpty()) {
+            for (Map.Entry<String, String> entry : headers.entrySet()) {
+                hb.add(entry.getKey(), entry.getValue());
+            }
+        }
+        Request request = new Request.Builder()
+                .url(url)
+                .headers(hb.build())
+                .post(body)
+                .build();
+        return getResponseWithTimeout(request);
+    }
+
+    private static String getResponseWithTimeout(Request request) {
+        String result = null;
+        try {
+            Response s = client.newCall(request).execute();
+            assert s.body() != null;
+            result = s.body().string();
+            s.close();
+        } catch (SocketTimeoutException e) {
+            System.err.println("get result timeout");
+        } catch (IOException e) {
+            System.err.println("get result error " + e.getMessage());
+        }
+        return result;
+    }
+}
\ No newline at end of file
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechFlashRecognizerDemo.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechFlashRecognizerDemo.java
+package com.yeejoin.amos.speech.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+
+/**
+ * 录音文件识别极速版（提交音频文件识别，文件大小，文件时长有限制）
+ * <p>
+ * 音频格式：支持AAC/MP3/OPUS/WAV格式编码的音频。
+ * 使用限制：支持100 MB以内且不超过2小时的音频文件的识别。
+ *
+ * @see SpeechRecognizerDemo
+ * 时长超过2小时的文件请使用录音文件识别普通版
+ *
+ * <p>
+ * 模型类型：8000（电话）/16000（非电话）。
+ */
+public class SpeechFlashRecognizerDemo {
+    private final File speechFile;
+
+    /**
+     * 不支持试用版
+     */
+    public static void main(String[] args) {
+        //String fileName = SpeechRecognizerRestfulDemo.class.getClassLoader().getResource("./nls-sample-16k.wav").getPath();
+        // 重要：此处用一个本地文件来模拟发送实时流数据，实际使用时，您可以从某处实时采集或接收语音流并发送到ASR服务端。
+        String fileName = "D:\\ffmpeg-4.4-full_build-shared\\bin\\test.mp3";
+        String format = "mp3";
+        int sampleRate = 16000;
+        SpeechFlashRecognizerDemo demo = new SpeechFlashRecognizerDemo(new File(fileName));
+        try {
+            demo.process(SpeechUtil.getToken(), format, sampleRate);
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    public SpeechFlashRecognizerDemo(File speechFile) {
+        this.speechFile = speechFile;
+    }
+
+    /**
+     * 设置HTTPS REST POST请求
+     * 1.使用http协议
+     * 2.语音识别服务域名：nls-gateway.cn-shanghai.aliyuncs.com
+     * 3.语音识别接口请求路径：/stream/v1/FlashRecognizer
+     * 4.设置必须请求参数：appkey、token、format、sample_rate
+     */
+    public void process(String token, String format, int sampleRate) {
+        String request = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/FlashRecognizer";
+        request = request + "?appkey=" + SpeechUtil.AppKey;
+        request = request + "&token=" + token;
+        request = request + "&format=" + format;
+        request = request + "&sample_rate=" + sampleRate;
+
+        System.out.println("Request: " + request);
+        /**
+         * 设置HTTPS头部字段
+         *
+         * 1.Content-Type：application/octet-stream
+         */
+        HashMap<String, String> headers = new HashMap<>();
+        headers.put("Content-Type", "application/octet-stream");
+        /**
+         * 发送HTTPS POST请求，返回服务端的响应。
+         */
+        long start = System.currentTimeMillis();
+        String response = HttpUtil.sendPostFile(request, headers, this.speechFile);
+        System.out.println("latency = " + (System.currentTimeMillis() - start) + " ms");
+        if (response != null) {
+            System.out.println("Response: " + response);
+        } else {
+            System.err.println("识别失败!");
+        }
+    }
+}
\ No newline at end of file
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechRecognizerDemo.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechRecognizerDemo.java
+package com.yeejoin.amos.speech.util;
+
+import com.alibaba.fastjson.JSONObject;
+import com.aliyuncs.CommonRequest;
+import com.aliyuncs.CommonResponse;
+import com.aliyuncs.DefaultAcsClient;
+import com.aliyuncs.IAcsClient;
+import com.aliyuncs.exceptions.ClientException;
+import com.aliyuncs.http.MethodType;
+import com.aliyuncs.profile.DefaultProfile;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * 录音文件识别普通版（提供外网访问的音频文件地址）
+ * <p>
+ * 识别的文件需要提交基于HTTP可访问的URL地址，可以通过URL访问，不支持提交本地文件
+ * 上传的录音文件URL的访问权限需要设置为公开，URL中只能使用域名不能使用IP地址、不可包含空格
+ *
+ * @see SpeechFlashRecognizerDemo
+ * （内网考虑使用录音文件识别极速版，极速版可以提交文件识别，但极速版不提供试用）
+ * <p>
+ * 支持单轨/双轨的.wav、.mp3、.m4a、.wma、.aac、.ogg、.amr、.flac格式录音文件识别
+ * 文件大小需控制在512 MB以下
+ * 免费用户每日可识别不超过2小时时长的录音文件
+ * 提交录音文件识别请求后，免费用户的识别任务在24小时内完成并返回识别文本； 付费用户的识别任务在6小时内完成并返回识别文本。识别结果在服务端可保存72小时
+ */
+public class SpeechRecognizerDemo {
+    // 地域ID，常量，固定值。
+    public static final String REGIONID = "cn-shanghai";
+    public static final String ENDPOINTNAME = "cn-shanghai";
+    public static final String PRODUCT = "nls-filetrans";
+    public static final String DOMAIN = "filetrans.cn-shanghai.aliyuncs.com";
+    public static final String API_VERSION = "2018-08-17";
+    public static final String POST_REQUEST_ACTION = "SubmitTask";
+    public static final String GET_REQUEST_ACTION = "GetTaskResult";
+
+    // 请求参数
+    public static final String KEY_APP_KEY = "appkey";
+    public static final String KEY_FILE_LINK = "file_link";
+    public static final String KEY_VERSION = "version";
+    public static final String KEY_ENABLE_WORDS = "enable_words";
+
+    // 响应参数
+    public static final String KEY_TASK = "Task";
+    public static final String KEY_TASK_ID = "TaskId";
+    public static final String KEY_STATUS_TEXT = "StatusText";
+    public static final String KEY_RESULT = "Result";
+
+    // 状态值
+    public static final String STATUS_SUCCESS = "SUCCESS";
+    private static final String STATUS_RUNNING = "RUNNING";
+    private static final String STATUS_QUEUEING = "QUEUEING";
+
+    // 阿里云鉴权client
+    IAcsClient client;
+    private static final Logger logger = LoggerFactory.getLogger(SpeechRecognizerDemo.class);
+
+    public static void main(String[] args) throws Exception {
+        String fileLink = "https://gw.alipayobjects.com/os/bmw-prod/0574ee2e-f494-45a5-820f-63aee583045a.wav";
+        SpeechRecognizerDemo demo = new SpeechRecognizerDemo();
+
+        // 第一步：提交录音文件识别请求，获取任务ID用于后续的识别结果轮询。
+        String taskId = demo.submitFileTransRequest(fileLink);
+        if (taskId != null) {
+            logger.info("录音文件识别请求成功，task_id: " + taskId);
+        } else {
+            logger.error("录音文件识别请求失败！");
+            return;
+        }
+
+        // 第二步：根据任务ID轮询识别结果。
+        String result = demo.getFileTransResult(taskId);
+        if (result != null) {
+            logger.info("录音文件识别结果查询成功：" + result);
+        } else {
+            logger.error("录音文件识别结果查询失败！");
+        }
+    }
+
+    public SpeechRecognizerDemo() {
+        // 设置endpoint
+        try {
+            DefaultProfile.addEndpoint(ENDPOINTNAME, REGIONID, PRODUCT, DOMAIN);
+        } catch (ClientException e) {
+            e.printStackTrace();
+        }
+        // 创建DefaultAcsClient实例并初始化
+        DefaultProfile profile = DefaultProfile.getProfile(REGIONID, SpeechUtil.AccessKeId, SpeechUtil.AccessKeySecret);
+        this.client = new DefaultAcsClient(profile);
+    }
+
+    /**
+     * 提交录音文件
+     */
+    public String submitFileTransRequest(String fileLink) {
+        /**
+         * 1. 创建CommonRequest，设置请求参数。
+         */
+        CommonRequest postRequest = new CommonRequest();
+        // 设置域名
+        postRequest.setDomain(DOMAIN);
+        // 设置API的版本号，格式为YYYY-MM-DD。
+        postRequest.setVersion(API_VERSION);
+        // 设置action
+        postRequest.setAction(POST_REQUEST_ACTION);
+        // 设置产品名称
+        postRequest.setProduct(PRODUCT);
+        /**
+         * 2. 设置录音文件识别请求参数，以JSON字符串的格式设置到请求Body中。
+         */
+        JSONObject taskObject = new JSONObject();
+        // 设置appkey
+        taskObject.put(KEY_APP_KEY, SpeechUtil.AppKey);
+        // 设置音频文件访问链接
+        taskObject.put(KEY_FILE_LINK, fileLink);
+        // 新接入请使用4.0版本，已接入（默认2.0）如需维持现状，请注释掉该参数设置。
+        taskObject.put(KEY_VERSION, "4.0");
+        // 设置是否输出词信息，默认为false，开启时需要设置version为4.0及以上。
+        taskObject.put(KEY_ENABLE_WORDS, true);
+        String task = taskObject.toJSONString();
+        logger.info(task);
+        // 设置以上JSON字符串为Body参数。
+        postRequest.putBodyParameter(KEY_TASK, task);
+        // 设置为POST方式的请求。
+        postRequest.setMethod(MethodType.POST);
+        /**
+         * 3. 提交录音文件识别请求，获取录音文件识别请求任务的ID，以供识别结果查询使用。
+         */
+        String taskId = null;
+        try {
+            CommonResponse postResponse = client.getCommonResponse(postRequest);
+            logger.warn("提交录音文件识别请求的响应：" + postResponse.getData());
+            if (postResponse.getHttpStatus() == 200) {
+                JSONObject result = JSONObject.parseObject(postResponse.getData());
+                String statusText = result.getString(KEY_STATUS_TEXT);
+                if (STATUS_SUCCESS.equals(statusText)) {
+                    taskId = result.getString(KEY_TASK_ID);
+                }
+            }
+        } catch (ClientException e) {
+            e.printStackTrace();
+        }
+        return taskId;
+    }
+
+    /**
+     * 根据任务ID轮询识别结果
+     */
+    public String getFileTransResult(String taskId) {
+        /**
+         * 1. 创建CommonRequest，设置任务ID。
+         */
+        CommonRequest getRequest = new CommonRequest();
+        // 设置域名
+        getRequest.setDomain(DOMAIN);
+        // 设置API版本
+        getRequest.setVersion(API_VERSION);
+        // 设置action
+        getRequest.setAction(GET_REQUEST_ACTION);
+        // 设置产品名称
+        getRequest.setProduct(PRODUCT);
+        // 设置任务ID为查询参数
+        getRequest.putQueryParameter(KEY_TASK_ID, taskId);
+        // 设置为GET方式的请求
+        getRequest.setMethod(MethodType.GET);
+        /**
+         * 2. 提交录音文件识别结果查询请求
+         * 以轮询的方式进行识别结果的查询，直到服务端返回的状态描述为“SUCCESS”或错误描述，则结束轮询。
+         */
+        String result = null;
+        while (true) {
+            try {
+                CommonResponse getResponse = client.getCommonResponse(getRequest);
+                logger.warn("识别查询结果：" + getResponse.getData());
+                if (getResponse.getHttpStatus() != 200) {
+                    break;
+                }
+                JSONObject rootObj = JSONObject.parseObject(getResponse.getData());
+                String statusText = rootObj.getString(KEY_STATUS_TEXT);
+                if (STATUS_RUNNING.equals(statusText) || STATUS_QUEUEING.equals(statusText)) {
+                    // 继续轮询，注意设置轮询时间间隔。
+                    TimeUnit.SECONDS.sleep(3);
+                } else {
+                    // 状态信息为成功，返回识别结果；状态信息为异常，返回空。
+                    if (STATUS_SUCCESS.equals(statusText)) {
+                        result = rootObj.getString(KEY_RESULT);
+                        // 状态信息为成功，但没有识别结果，则可能是由于文件里全是静音、噪音等导致识别为空。
+                        if (result == null) {
+                            result = "";
+                        }
+                    }
+                    break;
+                }
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+        return result;
+    }
+
+}
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechTranscriberDemo.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechTranscriberDemo.java
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechUtil.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/util/SpeechUtil.java
+package com.yeejoin.amos.speech.util;
+
+import com.alibaba.nls.client.AccessToken;
+
+import java.io.IOException;
+
+/**
+ * 阿里云语音识别
+ * <p>
+ * 试用版
+ * <p>
+ * 自2020年03月01日起，免费试用版规则调整为如下内容：
+ * 试用版不计费，如有变化，请关注官网通知。
+ * 一句话识别、实时语音识别、语音合成每自然日内支持2个并发调用，每自然日用量不限，免费试用期限为三个月。
+ * 录音文件识别每自然日识别时长不超过2小时，免费试用期限为三个月。
+ * 长文本语音合成和录音文件识别极速版均不支持试用版，如需使用请开通商用版本。
+ * <p>
+ * 商用版
+ * <p>
+ * 2019年06月10日零时后自动调整为如上新规计费方式。
+ * 对于录音文件识别，若您需要超过2个小时或更长的接口使用时长，请开通商用版。
+ * 开通商用版默认为后付费方式。购买预付费资源包后，自动变更为预付费方式，并优先使用资源包内资源进行抵扣，当预付费资源包内资源使用完后，会再次变更为后付费方式。
+ * 商用版按每自然日使用量计费，无免费额度，不使用则不产生费用。北京时间每日24时，系统将自动对当日用量进行全量计算并计费，账单生成将稍有延迟。
+ * 开通商用版后，不建议再次降配为试用版。商用版降配试用版后，可用并发数将置为0，服务将无法继续使用。
+ */
+public class SpeechUtil {
+    /**
+     * 测试信息
+     */
+    public static final String AccessKeId = "LTAI5t7mGN6dYoCwMdKiLTgt";
+    public static final String AccessKeySecret = "0LYdEnvKzQxBg0lpIahDp5rzB2r4Dp";
+    public static final String AppKey = "EG5fJBBIqkNMj6bM";
+
+    /**
+     * 获取访问Token（测试Token过期时间为18天）
+     *
+     * @return token
+     */
+    public static String getToken() throws IOException {
+        AccessToken accessToken = new AccessToken(AccessKeId, AccessKeySecret);
+        accessToken.apply();
+        System.out.println("get token: " + accessToken.getToken() + ", expire time: " + accessToken.getExpireTime());
+        return accessToken.getToken();
+    }
+}
--- a/amos-boot-utils/amos-boot-utils-speech/src/main/resources/application.properties
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/resources/application.properties
--- a/amos-boot-utils/pom.xml
+++ b/amos-boot-utils/pom.xml
 <?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <artifactId>amos-biz-boot</artifactId>
-    <groupId>com.amosframework.boot</groupId>
-    <version>1.0.0</version>
-  </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <artifactId>amos-biz-boot</artifactId>
+        <groupId>com.amosframework.boot</groupId>
+        <version>1.0.0</version>
+    </parent>


-  <artifactId>amos-boot-utils</artifactId>
-  <packaging>pom</packaging>
+    <artifactId>amos-boot-utils</artifactId>
+    <packaging>pom</packaging>

-  <dependencies>
-  </dependencies>
+    <dependencies>
+    </dependencies>

-  
-  <modules>
-  	<module>amos-boot-utils-jpush</module>
-    <module>amos-boot-utils-video</module>
-  </modules>
+
+    <modules>
+        <module>amos-boot-utils-jpush</module>
+        <module>amos-boot-utils-video</module>
+        <module>amos-boot-utils-speech</module>
+    </modules>
 </project>
\ No newline at end of file