liumaolin commited on
Commit
b6d76bc
·
1 Parent(s): 5f9e92a

Replace `ffmpeg`-based audio loading with `soundfile` and `librosa`

Browse files

- Switch `load_audio` implementation to use `soundfile` and `librosa` for improved compatibility and removal of `ffmpeg` dependency.
- Update index.html title to "Voice Dialogue".
- Standardize spacing in `electron-app/utils.js` and adjust file path for `.version.json`.

assets/www/index.html CHANGED
@@ -4,7 +4,7 @@
4
  <meta charset="UTF-8" />
5
  <link rel="icon" type="image/svg+xml" href="./favicon.ico" />
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
- <title>Translator</title>
8
  <script type="module" crossorigin src="./assets/index-54d4b22e.js"></script>
9
  <link rel="stylesheet" href="./assets/index-deecf395.css">
10
  </head>
 
4
  <meta charset="UTF-8" />
5
  <link rel="icon" type="image/svg+xml" href="./favicon.ico" />
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>Voice Dialogue</title>
8
  <script type="module" crossorigin src="./assets/index-54d4b22e.js"></script>
9
  <link rel="stylesheet" href="./assets/index-deecf395.css">
10
  </head>
electron-app/utils.js CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4bac68a368fb5234d18984394771063d8bda786db8cd33f256a9e285e2dfe7d
3
- size 6790
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a63d51ebee36b6cd63b3943b348a3226e38828e5cb5a13c94742776b1eb787
3
+ size 7350
scripts/build.sh CHANGED
File without changes
third_party/moyoyo_tts/tools/my_utils.py CHANGED
@@ -1,34 +1,60 @@
1
  import os
2
  import traceback
3
 
4
- import ffmpeg
5
  import numpy as np
 
 
6
 
7
  from moyoyo_tts.tools.i18n.i18n import I18nAuto
8
 
9
  i18n = I18nAuto(language=os.environ.get('language', 'Auto'))
10
 
11
-
12
  def load_audio(file, sr):
13
  try:
14
- # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
15
- # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
16
- # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
17
  file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车
18
  if os.path.exists(file) == False:
19
  raise RuntimeError(
20
  "You input a wrong audio path that does not exists, please fix it!"
21
  )
22
- out, _ = (
23
- ffmpeg.input(file, threads=0)
24
- .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
25
- .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
26
- )
 
 
 
 
 
 
 
 
 
27
  except Exception as e:
28
  traceback.print_exc()
29
  raise RuntimeError(i18n("音频加载失败"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- return np.frombuffer(out, np.float32).flatten()
32
 
33
 
34
  def clean_path(path_str: str):
 
1
  import os
2
  import traceback
3
 
4
+ # import ffmpeg
5
  import numpy as np
6
+ import soundfile as sf
7
+ import librosa
8
 
9
  from moyoyo_tts.tools.i18n.i18n import I18nAuto
10
 
11
  i18n = I18nAuto(language=os.environ.get('language', 'Auto'))
12
 
 
13
  def load_audio(file, sr):
14
  try:
 
 
 
15
  file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车
16
  if os.path.exists(file) == False:
17
  raise RuntimeError(
18
  "You input a wrong audio path that does not exists, please fix it!"
19
  )
20
+
21
+ # 使用soundfile读取音频文件
22
+ data, original_sr = sf.read(file, dtype='float32')
23
+
24
+ # 如果是多声道,转换为单声道(取平均值)
25
+ if len(data.shape) > 1:
26
+ data = np.mean(data, axis=1)
27
+
28
+ # 如果采样率不匹配,进行重采样
29
+ if original_sr != sr:
30
+ data = librosa.resample(data, orig_sr=original_sr, target_sr=sr)
31
+
32
+ return data.flatten()
33
+
34
  except Exception as e:
35
  traceback.print_exc()
36
  raise RuntimeError(i18n("音频加载失败"))
37
+
38
+ # def load_audio(file, sr):
39
+ # try:
40
+ # # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
41
+ # # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
42
+ # # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
43
+ # file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车
44
+ # if os.path.exists(file) == False:
45
+ # raise RuntimeError(
46
+ # "You input a wrong audio path that does not exists, please fix it!"
47
+ # )
48
+ # out, _ = (
49
+ # ffmpeg.input(file, threads=0)
50
+ # .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
51
+ # .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
52
+ # )
53
+ # except Exception as e:
54
+ # traceback.print_exc()
55
+ # raise RuntimeError(i18n("音频加载失败"))
56
 
57
+ # return np.frombuffer(out, np.float32).flatten()
58
 
59
 
60
  def clean_path(path_str: str):