rcell commited on
Commit
fe5b963
·
1 Parent(s): cb3140f

update advance

Browse files
Files changed (1) hide show
  1. app.py +20 -16
app.py CHANGED
@@ -55,6 +55,8 @@ _ = utils.load_checkpoint("output.pth", net_g_ms, None)
55
 
56
 
57
  def tts(text):
 
 
58
  sid = torch.LongTensor([2]) # speaker identity
59
  stn_tst = get_text(text, hps_ms)
60
 
@@ -64,12 +66,14 @@ def tts(text):
64
  # print(stn_tst.size())
65
  audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
66
  0, 0].data.float().numpy()
67
- return (hps.data.sampling_rate, audio)
68
 
69
  def clean_text(text):
70
  return japanese_cleaners(text)
71
 
72
  def generate_from_clean(text):
 
 
73
  sid = torch.LongTensor([2]) # speaker identity
74
 
75
  text_norm = cleaned_text_to_sequence(text)
@@ -82,25 +86,25 @@ def generate_from_clean(text):
82
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
83
  audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
84
  0, 0].data.float().numpy()
85
- return (hps.data.sampling_rate, audio)
86
  app = gr.Blocks()
87
  with app:
88
  with gr.Tabs():
89
- with gr.TabItem("基本"):
90
- tts_input1 = gr.TextArea(label="请输入日语文本", value="こんにちは。")
91
  # tts_input2 = gr.Dropdown(label="Speaker", choices=hps.speakers, type="index", value=hps.speakers[0])
92
- tts_submit = gr.Button("生成", variant="primary")
93
- # tts_output1 = gr.Textbox(label="Output Message")
94
- tts_output2 = gr.Audio(label="输出")
95
- tts_submit.click(tts, [tts_input1], [tts_output2])
96
- with gr.TabItem("高级"):
97
- tts_input3 = gr.TextArea(label="请输入日语文本", value="こんにちは。")
98
- tts_s1 = gr.Button("清理", variant="primary")
99
- tts_input4 = gr.TextArea(label="调整调形", value="ko↑Nniʧiwa.")
100
- tts_s2 = gr.Button("生成", variant="primary")
101
-
102
- tts_o = gr.Audio(label="输出")
103
  tts_s1.click(clean_text, [tts_input3], [ tts_input4])
104
- tts_s2.click(generate_from_clean, [tts_input4], [tts_o])
105
 
106
  app.launch()
 
55
 
56
 
57
  def tts(text):
58
+ if len(text) > 150:
59
+ return "Error: Text is too long", None
60
  sid = torch.LongTensor([2]) # speaker identity
61
  stn_tst = get_text(text, hps_ms)
62
 
 
66
  # print(stn_tst.size())
67
  audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
68
  0, 0].data.float().numpy()
69
+ return "Success", (hps.data.sampling_rate, audio)
70
 
71
  def clean_text(text):
72
  return japanese_cleaners(text)
73
 
74
  def generate_from_clean(text):
75
+ if len(text) > 300:
76
+ return "Error: Text is too long", None
77
  sid = torch.LongTensor([2]) # speaker identity
78
 
79
  text_norm = cleaned_text_to_sequence(text)
 
86
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
87
  audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
88
  0, 0].data.float().numpy()
89
+ return "Success", (hps.data.sampling_rate, audio)
90
  app = gr.Blocks()
91
  with app:
92
  with gr.Tabs():
93
+ with gr.TabItem("Basic"):
94
+ tts_input1 = gr.TextArea(label="Text in Japanese (150 words limitation)", value="こんにちは。")
95
  # tts_input2 = gr.Dropdown(label="Speaker", choices=hps.speakers, type="index", value=hps.speakers[0])
96
+ tts_submit = gr.Button("Generate", variant="primary")
97
+ tts_output1 = gr.Textbox(label="Message")
98
+ tts_output2 = gr.Audio(label="Output")
99
+ tts_submit.click(tts, [tts_input1], [tts_output1, tts_output2])
100
+ with gr.TabItem("Advanced"):
101
+ tts_input3 = gr.TextArea(label="Text in Japanese", value="こんにちは。")
102
+ tts_s1 = gr.Button("Clean", variant="primary")
103
+ tts_input4 = gr.TextArea(label="Cleaned Text (300 words limitation)", value="ko↑Nniʧiwa.")
104
+ tts_s2 = gr.Button("Generate", variant="primary")
105
+ message = gr.Textbox(label="Message")
106
+ tts_o = gr.Audio(label="Output")
107
  tts_s1.click(clean_text, [tts_input3], [ tts_input4])
108
+ tts_s2.click(generate_from_clean, [tts_input4], [message, tts_o])
109
 
110
  app.launch()