语音融合代码修改

8b4e795a · chenzhao · a7434fbd · 8b4e795a · 8b4e795a · 8b4e795a
Commit 8b4e795a authored Jun 15, 2023 by chenzhao
3 changed files
--- a/amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/SocketClient.java
+++ b/amos-boot-module/amos-boot-module-biz/amos-boot-module-jcs-biz/src/main/java/com/yeejoin/amos/boot/module/jcs/biz/audioToText/SocketClient.java
@@ -20,14 +20,14 @@ import static com.yeejoin.amos.boot.module.jcs.biz.audioToText.util.SpeechTransc
 public class SocketClient {
    private static final Logger logger = LoggerFactory.getLogger(SocketClient.class);
    private static final String[] testFilePath = {
-            "C:\\Users\\DELL\\Desktop\\yuyin\\out1.pcm",
+            "C:\\Users\\DELL\\Desktop\\ffmpeg-4.4-full_build-shared\\bin\\202306158000.pcm",
-            "C:\\Users\\DELL\\Desktop\\yuyin\\out.pcm",
+            "C:\\Users\\DELL\\Desktop\\ffmpeg-4.4-full_build-shared\\bin\\202306158000.pcm",
    };
    public static void main(String[] args) throws SocketException {
        SocketClient socketClient = new SocketClient();
        //socketClient.processTcp(0, 0);
-        socketClient.processUdp(25002, 1);
+        socketClient.processUdp(25001, 1);
    }
    @Async
@@ -36,8 +36,8 @@ public class SocketClient {
        if (type >= testFilePath.length) type -= 1;
        DatagramSocket datagramSocket = new DatagramSocket();
        try {
-            FileInputStream fis = new FileInputStream(new File("C:\\Users\\DELL\\Desktop\\ffmpeg-4.4-full_build-shared\\bin\\out.pcm"));
+            FileInputStream fis = new FileInputStream(new File("C:\\Users\\DELL\\Desktop\\ffmpeg-4.4-full_build-shared\\bin\\202306158000.pcm"));
-            byte[] b = new byte[1280];
+            byte[] b = new byte[320];
            int len;
            while ((len = fis.read(b)) > 0) {
                logger.info("send data pack length: " + len);
@@ -62,7 +62,7 @@ public class SocketClient {
            socket.connect(new InetSocketAddress(InetAddress.getLocalHost().getHostAddress(), port));
            OutputStream outputStream = socket.getOutputStream();
            FileInputStream fis = new FileInputStream(new File(testFilePath[type]));
-            byte[] b = new byte[4096];
+            byte[] b = new byte[320];
            int len;
            while ((len = fis.read(b)) > 0) {
                logger.info("send data pack length: " + len);

--- a/amos-boot-system-jcs/src/main/resources/application.properties
+++ b/amos-boot-system-jcs/src/main/resources/application.properties
@@ -143,6 +143,10 @@ management.security.enabled=false
 management.endpoint.health.show-details=always
 management.endpoints.web.exposure.include=*
+speech-config.access-key-id=LTAI5t62oH95jgbjRiNXPsho
+speech-config.access-key-secret=shy9SpogYgcdDoyTB3bvP21VSRmz8n
+speech-config.app-key=FC84bGUpbNFrexoL
 ##代码中有部分逻辑冲突需要处理  为区分机场和电力逻辑 增加开关 若为true 则为机场逻辑 为false 则为电力逻辑
 logic=true

--- a/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/AppSpeechTranscriber.java
+++ b/amos-boot-utils/amos-boot-utils-speech/src/main/java/com/yeejoin/amos/speech/AppSpeechTranscriber.java
@@ -61,8 +61,8 @@ public class AppSpeechTranscriber {
        SpeechTranscriber transcriber = null;
        try {
            //创建实例、建立连接。
-            //byte[] b = new byte[332];
+            byte[] b = new byte[332];
-            byte[] b = new byte[320];
+            //byte[] b = new byte[320];
            DatagramPacket datagramPacket = new DatagramPacket(b, b.length);
            logger.warn("serverSocket已启动，地址：" + localIpAddress
                    + "监听端口：" + serverSocket.getLocalPort() + "  等待语音融合系统推送数据...");
@@ -79,8 +79,8 @@ public class AppSpeechTranscriber {
   //             logger.warn("收到数据包：" + b.length);
                //去掉前12个字节的rtp包头，后面的320字节为语音数据
                //4秒未再次调用此方法，阿里云会抛出超时异常
-  //              transcriber.send(Arrays.copyOfRange(b, 12, b.length));
+                transcriber.send(Arrays.copyOfRange(b, 12, b.length));
-                transcriber.send(b);
+                //transcriber.send(b);
            }
        } catch (Exception e) {
            logger.error(e.getMessage());
@@ -110,9 +110,9 @@ public class AppSpeechTranscriber {
        //输入音频编码方式。
        transcriber.setFormat(InputFormatEnum.PCM);
        //输入音频采样率。
-        transcriber.setSampleRate(SampleRateEnum.SAMPLE_RATE_16K);
+        transcriber.setSampleRate(SampleRateEnum.SAMPLE_RATE_8K);
        //是否返回中间识别结果。
-        transcriber.setEnableIntermediateResult(true);
+        transcriber.setEnableIntermediateResult(false);
        //是否生成并返回标点符号。
        transcriber.setEnablePunctuation(false);
        //是否将返回结果规整化，比如将一百返回为100。
@@ -125,12 +125,12 @@ public class AppSpeechTranscriber {
        //设置是否开启顺滑。
        //transcriber.addCustomedParam("disfluency",true);
        //设置是否开启词模式。
-        transcriber.addCustomedParam("enable_words",true);
+        //transcriber.addCustomedParam("enable_words",false);
        //设置vad噪音阈值参数，参数取值为-1～+1，如-0.9、-0.8、0.2、0.9。
        //取值越趋于-1，判定为语音的概率越大，亦即有可能更多噪声被当成语音被误识别。
        //取值越趋于+1，判定为噪音的越多，亦即有可能更多语音段被当成噪音被拒绝识别。
        //该参数属高级参数，调整需慎重和重点测试。
-        transcriber.addCustomedParam("speech_noise_threshold",0.5);
+        //transcriber.addCustomedParam("speech_noise_threshold",0.5);
        //设置训练后的定制语言模型id。
        //transcriber.addCustomedParam("customization_id","你的定制语言模型id");
        //设置训练后的定制热词id。