Spaces:
Running
Running
update advance
Browse files
app.py
CHANGED
@@ -55,6 +55,8 @@ _ = utils.load_checkpoint("output.pth", net_g_ms, None)
|
|
55 |
|
56 |
|
57 |
def tts(text):
|
|
|
|
|
58 |
sid = torch.LongTensor([2]) # speaker identity
|
59 |
stn_tst = get_text(text, hps_ms)
|
60 |
|
@@ -64,12 +66,14 @@ def tts(text):
|
|
64 |
# print(stn_tst.size())
|
65 |
audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
|
66 |
0, 0].data.float().numpy()
|
67 |
-
return (hps.data.sampling_rate, audio)
|
68 |
|
69 |
def clean_text(text):
|
70 |
return japanese_cleaners(text)
|
71 |
|
72 |
def generate_from_clean(text):
|
|
|
|
|
73 |
sid = torch.LongTensor([2]) # speaker identity
|
74 |
|
75 |
text_norm = cleaned_text_to_sequence(text)
|
@@ -82,25 +86,25 @@ def generate_from_clean(text):
|
|
82 |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
|
83 |
audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
|
84 |
0, 0].data.float().numpy()
|
85 |
-
return (hps.data.sampling_rate, audio)
|
86 |
app = gr.Blocks()
|
87 |
with app:
|
88 |
with gr.Tabs():
|
89 |
-
with gr.TabItem("
|
90 |
-
tts_input1 = gr.TextArea(label="
|
91 |
# tts_input2 = gr.Dropdown(label="Speaker", choices=hps.speakers, type="index", value=hps.speakers[0])
|
92 |
-
tts_submit = gr.Button("
|
93 |
-
|
94 |
-
tts_output2 = gr.Audio(label="
|
95 |
-
tts_submit.click(tts, [tts_input1], [tts_output2])
|
96 |
-
with gr.TabItem("
|
97 |
-
tts_input3 = gr.TextArea(label="
|
98 |
-
tts_s1 = gr.Button("
|
99 |
-
tts_input4 = gr.TextArea(label="
|
100 |
-
tts_s2 = gr.Button("
|
101 |
-
|
102 |
-
tts_o = gr.Audio(label="
|
103 |
tts_s1.click(clean_text, [tts_input3], [ tts_input4])
|
104 |
-
tts_s2.click(generate_from_clean, [tts_input4], [tts_o])
|
105 |
|
106 |
app.launch()
|
|
|
55 |
|
56 |
|
57 |
def tts(text):
|
58 |
+
if len(text) > 150:
|
59 |
+
return "Error: Text is too long", None
|
60 |
sid = torch.LongTensor([2]) # speaker identity
|
61 |
stn_tst = get_text(text, hps_ms)
|
62 |
|
|
|
66 |
# print(stn_tst.size())
|
67 |
audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
|
68 |
0, 0].data.float().numpy()
|
69 |
+
return "Success", (hps.data.sampling_rate, audio)
|
70 |
|
71 |
def clean_text(text):
|
72 |
return japanese_cleaners(text)
|
73 |
|
74 |
def generate_from_clean(text):
|
75 |
+
if len(text) > 300:
|
76 |
+
return "Error: Text is too long", None
|
77 |
sid = torch.LongTensor([2]) # speaker identity
|
78 |
|
79 |
text_norm = cleaned_text_to_sequence(text)
|
|
|
86 |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
|
87 |
audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
|
88 |
0, 0].data.float().numpy()
|
89 |
+
return "Success", (hps.data.sampling_rate, audio)
|
90 |
app = gr.Blocks()
|
91 |
with app:
|
92 |
with gr.Tabs():
|
93 |
+
with gr.TabItem("Basic"):
|
94 |
+
tts_input1 = gr.TextArea(label="Text in Japanese (150 words limitation)", value="こんにちは。")
|
95 |
# tts_input2 = gr.Dropdown(label="Speaker", choices=hps.speakers, type="index", value=hps.speakers[0])
|
96 |
+
tts_submit = gr.Button("Generate", variant="primary")
|
97 |
+
tts_output1 = gr.Textbox(label="Message")
|
98 |
+
tts_output2 = gr.Audio(label="Output")
|
99 |
+
tts_submit.click(tts, [tts_input1], [tts_output1, tts_output2])
|
100 |
+
with gr.TabItem("Advanced"):
|
101 |
+
tts_input3 = gr.TextArea(label="Text in Japanese", value="こんにちは。")
|
102 |
+
tts_s1 = gr.Button("Clean", variant="primary")
|
103 |
+
tts_input4 = gr.TextArea(label="Cleaned Text (300 words limitation)", value="ko↑Nniʧiwa.")
|
104 |
+
tts_s2 = gr.Button("Generate", variant="primary")
|
105 |
+
message = gr.Textbox(label="Message")
|
106 |
+
tts_o = gr.Audio(label="Output")
|
107 |
tts_s1.click(clean_text, [tts_input3], [ tts_input4])
|
108 |
+
tts_s2.click(generate_from_clean, [tts_input4], [message, tts_o])
|
109 |
|
110 |
app.launch()
|