liumaolin
commited on
Commit
·
b6d76bc
1
Parent(s):
5f9e92a
Replace `ffmpeg`-based audio loading with `soundfile` and `librosa`
Browse files- Switch `load_audio` implementation to use `soundfile` and `librosa` for improved compatibility and removal of `ffmpeg` dependency.
- Update index.html title to "Voice Dialogue".
- Standardize spacing in `electron-app/utils.js` and adjust file path for `.version.json`.
- assets/www/index.html +1 -1
- electron-app/utils.js +2 -2
- scripts/build.sh +0 -0
- third_party/moyoyo_tts/tools/my_utils.py +37 -11
assets/www/index.html
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
<meta charset="UTF-8" />
|
5 |
<link rel="icon" type="image/svg+xml" href="./favicon.ico" />
|
6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
7 |
-
<title>
|
8 |
<script type="module" crossorigin src="./assets/index-54d4b22e.js"></script>
|
9 |
<link rel="stylesheet" href="./assets/index-deecf395.css">
|
10 |
</head>
|
|
|
4 |
<meta charset="UTF-8" />
|
5 |
<link rel="icon" type="image/svg+xml" href="./favicon.ico" />
|
6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
7 |
+
<title>Voice Dialogue</title>
|
8 |
<script type="module" crossorigin src="./assets/index-54d4b22e.js"></script>
|
9 |
<link rel="stylesheet" href="./assets/index-deecf395.css">
|
10 |
</head>
|
electron-app/utils.js
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2a63d51ebee36b6cd63b3943b348a3226e38828e5cb5a13c94742776b1eb787
|
3 |
+
size 7350
|
scripts/build.sh
CHANGED
File without changes
|
third_party/moyoyo_tts/tools/my_utils.py
CHANGED
@@ -1,34 +1,60 @@
|
|
1 |
import os
|
2 |
import traceback
|
3 |
|
4 |
-
import ffmpeg
|
5 |
import numpy as np
|
|
|
|
|
6 |
|
7 |
from moyoyo_tts.tools.i18n.i18n import I18nAuto
|
8 |
|
9 |
i18n = I18nAuto(language=os.environ.get('language', 'Auto'))
|
10 |
|
11 |
-
|
12 |
def load_audio(file, sr):
|
13 |
try:
|
14 |
-
# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
|
15 |
-
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
|
16 |
-
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
|
17 |
file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车
|
18 |
if os.path.exists(file) == False:
|
19 |
raise RuntimeError(
|
20 |
"You input a wrong audio path that does not exists, please fix it!"
|
21 |
)
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
except Exception as e:
|
28 |
traceback.print_exc()
|
29 |
raise RuntimeError(i18n("音频加载失败"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
-
|
32 |
|
33 |
|
34 |
def clean_path(path_str: str):
|
|
|
1 |
import os
|
2 |
import traceback
|
3 |
|
4 |
+
# import ffmpeg
|
5 |
import numpy as np
|
6 |
+
import soundfile as sf
|
7 |
+
import librosa
|
8 |
|
9 |
from moyoyo_tts.tools.i18n.i18n import I18nAuto
|
10 |
|
11 |
i18n = I18nAuto(language=os.environ.get('language', 'Auto'))
|
12 |
|
|
|
13 |
def load_audio(file, sr):
|
14 |
try:
|
|
|
|
|
|
|
15 |
file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车
|
16 |
if os.path.exists(file) == False:
|
17 |
raise RuntimeError(
|
18 |
"You input a wrong audio path that does not exists, please fix it!"
|
19 |
)
|
20 |
+
|
21 |
+
# 使用soundfile读取音频文件
|
22 |
+
data, original_sr = sf.read(file, dtype='float32')
|
23 |
+
|
24 |
+
# 如果是多声道,转换为单声道(取平均值)
|
25 |
+
if len(data.shape) > 1:
|
26 |
+
data = np.mean(data, axis=1)
|
27 |
+
|
28 |
+
# 如果采样率不匹配,进行重采样
|
29 |
+
if original_sr != sr:
|
30 |
+
data = librosa.resample(data, orig_sr=original_sr, target_sr=sr)
|
31 |
+
|
32 |
+
return data.flatten()
|
33 |
+
|
34 |
except Exception as e:
|
35 |
traceback.print_exc()
|
36 |
raise RuntimeError(i18n("音频加载失败"))
|
37 |
+
|
38 |
+
# def load_audio(file, sr):
|
39 |
+
# try:
|
40 |
+
# # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
|
41 |
+
# # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
|
42 |
+
# # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
|
43 |
+
# file = clean_path(file) # 防止小白拷路径头尾带了空格和"和回车
|
44 |
+
# if os.path.exists(file) == False:
|
45 |
+
# raise RuntimeError(
|
46 |
+
# "You input a wrong audio path that does not exists, please fix it!"
|
47 |
+
# )
|
48 |
+
# out, _ = (
|
49 |
+
# ffmpeg.input(file, threads=0)
|
50 |
+
# .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
51 |
+
# .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
52 |
+
# )
|
53 |
+
# except Exception as e:
|
54 |
+
# traceback.print_exc()
|
55 |
+
# raise RuntimeError(i18n("音频加载失败"))
|
56 |
|
57 |
+
# return np.frombuffer(out, np.float32).flatten()
|
58 |
|
59 |
|
60 |
def clean_path(path_str: str):
|