Replace `ffmpeg`-based audio loading with `soundfile` and `librosa`

- Switch `load_audio` implementation to use `soundfile` and `librosa` for improved compatibility and removal of `ffmpeg` dependency.
- Update index.html title to "Voice Dialogue".
- Standardize spacing in `electron-app/utils.js` and adjust file path for `.version.json`.

Files changed (4) hide show

assets/www/index.html +1 -1
electron-app/utils.js +2 -2
scripts/build.sh +0 -0
third_party/moyoyo_tts/tools/my_utils.py +37 -11

assets/www/index.html CHANGED Viewed

@@ -4,7 +4,7 @@
     <meta charset="UTF-8" />
     <link rel="icon" type="image/svg+xml" href="./favicon.ico" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Translator</title>
     <script type="module" crossorigin src="./assets/index-54d4b22e.js"></script>
     <link rel="stylesheet" href="./assets/index-deecf395.css">
   </head>

     <meta charset="UTF-8" />
     <link rel="icon" type="image/svg+xml" href="./favicon.ico" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Voice Dialogue</title>
     <script type="module" crossorigin src="./assets/index-54d4b22e.js"></script>
     <link rel="stylesheet" href="./assets/index-deecf395.css">
   </head>

electron-app/utils.js CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4bac68a368fb5234d18984394771063d8bda786db8cd33f256a9e285e2dfe7d
-size 6790

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2a63d51ebee36b6cd63b3943b348a3226e38828e5cb5a13c94742776b1eb787
+size 7350

scripts/build.sh CHANGED Viewed

File without changes

third_party/moyoyo_tts/tools/my_utils.py CHANGED Viewed

@@ -1,34 +1,60 @@
 import os
 import traceback
-import ffmpeg
 import numpy as np
 from moyoyo_tts.tools.i18n.i18n import I18nAuto
 i18n = I18nAuto(language=os.environ.get('language', 'Auto'))
 def load_audio(file, sr):
     try:
-        # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
-        # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
-        # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
         file = clean_path(file)  # 防止小白拷路径头尾带了空格和"和回车
         if os.path.exists(file) == False:
             raise RuntimeError(
                 "You input a wrong audio path that does not exists, please fix it!"
             )
-        out, _ = (
-            ffmpeg.input(file, threads=0)
-            .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
-            .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
-        )
     except Exception as e:
         traceback.print_exc()
         raise RuntimeError(i18n("音频加载失败"))
-    return np.frombuffer(out, np.float32).flatten()
 def clean_path(path_str: str):

 import os
 import traceback
+# import ffmpeg
 import numpy as np
+import soundfile as sf
+import librosa
 from moyoyo_tts.tools.i18n.i18n import I18nAuto
 i18n = I18nAuto(language=os.environ.get('language', 'Auto'))
 def load_audio(file, sr):
     try:
         file = clean_path(file)  # 防止小白拷路径头尾带了空格和"和回车
         if os.path.exists(file) == False:
             raise RuntimeError(
                 "You input a wrong audio path that does not exists, please fix it!"
             )
+        # 使用soundfile读取音频文件
+        data, original_sr = sf.read(file, dtype='float32')
+        # 如果是多声道，转换为单声道（取平均值）
+        if len(data.shape) > 1:
+            data = np.mean(data, axis=1)
+        # 如果采样率不匹配，进行重采样
+        if original_sr != sr:
+            data = librosa.resample(data, orig_sr=original_sr, target_sr=sr)
+        return data.flatten()
     except Exception as e:
         traceback.print_exc()
         raise RuntimeError(i18n("音频加载失败"))
+# def load_audio(file, sr):
+#     try:
+#         # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
+#         # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
+#         # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
+#         file = clean_path(file)  # 防止小白拷路径头尾带了空格和"和回车
+#         if os.path.exists(file) == False:
+#             raise RuntimeError(
+#                 "You input a wrong audio path that does not exists, please fix it!"
+#             )
+#         out, _ = (
+#             ffmpeg.input(file, threads=0)
+#             .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
+#             .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+#         )
+#     except Exception as e:
+#         traceback.print_exc()
+#         raise RuntimeError(i18n("音频加载失败"))
+#     return np.frombuffer(out, np.float32).flatten()
 def clean_path(path_str: str):