Spaces:
Paused
Paused
Xingde Jiang
commited on
Commit
·
877c60c
1
Parent(s):
63e488a
add text, audio, video capability
Browse files- app.py +868 -61
- audios/tempfile.mp3 +0 -0
- azure_utils.py +155 -0
- images/humancare.jpg +0 -0
- polly_utils.py +635 -0
- requirements.txt +10 -1
- videos/humancare.mp4 +0 -0
app.py
CHANGED
@@ -14,6 +14,28 @@ Original file is located at
|
|
14 |
# !pip install datasets
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
import gradio as gr
|
18 |
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
19 |
from torch import tensor as torch_tensor
|
@@ -22,9 +44,9 @@ from datasets import load_dataset
|
|
22 |
"""# import models"""
|
23 |
|
24 |
bi_encoder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
|
25 |
-
bi_encoder.max_seq_length = 256
|
26 |
|
27 |
-
#The bi-encoder will retrieve top_k documents. We use a cross-encoder, to re-rank the results list to improve the quality
|
28 |
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
|
29 |
|
30 |
"""# import datasets"""
|
@@ -35,75 +57,860 @@ dataset_embed = load_dataset("gfhayworth/wiki_mini_embed", split='train')
|
|
35 |
dataset_embed_pd = dataset_embed.to_pandas()
|
36 |
mycorpus_embeddings = torch_tensor(dataset_embed_pd.values)
|
37 |
|
38 |
-
def search(query, top_k=20, top_n = 1):
|
39 |
-
question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
|
40 |
-
question_embedding = question_embedding #.cuda()
|
41 |
-
hits = util.semantic_search(question_embedding, mycorpus_embeddings, top_k=top_k)
|
42 |
-
hits = hits[0] # Get the hits for the first query
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
|
53 |
-
predictions = hits[:top_n]
|
54 |
-
return predictions
|
55 |
-
# for hit in hits[0:3]:
|
56 |
-
# print("\t{:.3f}\t{}".format(hit['cross-score'], mypassages[hit['corpus_id']].replace("\n", " ")))
|
57 |
|
58 |
def get_text(qry):
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
69 |
|
70 |
# prt_rslt("who is the best rapper in the world?")
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
def chat(message, history):
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
inputs=message
|
106 |
)
|
107 |
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
|
|
|
14 |
# !pip install datasets
|
15 |
|
16 |
|
17 |
+
from azure_utils import AzureVoiceData
|
18 |
+
from polly_utils import PollyVoiceData, NEURAL_ENGINE
|
19 |
+
from langchain.prompts import PromptTemplate
|
20 |
+
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError
|
21 |
+
import re
|
22 |
+
import sys
|
23 |
+
from io import StringIO
|
24 |
+
from threading import Lock
|
25 |
+
from langchain.llms import OpenAI
|
26 |
+
from langchain.chains.conversation.memory import ConversationBufferMemory
|
27 |
+
from langchain.agents import tool, load_tools, initialize_agent
|
28 |
+
from langchain import ConversationChain, LLMChain
|
29 |
+
import whisper
|
30 |
+
import warnings
|
31 |
+
import boto3
|
32 |
+
import datetime
|
33 |
+
from typing import Optional, Tuple
|
34 |
+
from contextlib import closing
|
35 |
+
# Console to variable
|
36 |
+
import io
|
37 |
+
import requests
|
38 |
+
import os
|
39 |
import gradio as gr
|
40 |
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
41 |
from torch import tensor as torch_tensor
|
|
|
44 |
"""# import models"""
|
45 |
|
46 |
bi_encoder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
|
47 |
+
bi_encoder.max_seq_length = 256 # Truncate long passages to 256 tokens
|
48 |
|
49 |
+
# The bi-encoder will retrieve top_k documents. We use a cross-encoder, to re-rank the results list to improve the quality
|
50 |
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
|
51 |
|
52 |
"""# import datasets"""
|
|
|
57 |
dataset_embed_pd = dataset_embed.to_pandas()
|
58 |
mycorpus_embeddings = torch_tensor(dataset_embed_pd.values)
|
59 |
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
def search(query, top_k=20, top_n=1):
|
62 |
+
question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
|
63 |
+
question_embedding = question_embedding # .cuda()
|
64 |
+
hits = util.semantic_search(
|
65 |
+
question_embedding, mycorpus_embeddings, top_k=top_k)
|
66 |
+
hits = hits[0] # Get the hits for the first query
|
67 |
|
68 |
+
##### Re-Ranking #####
|
69 |
+
cross_inp = [[query, mypassages[hit['corpus_id']]] for hit in hits]
|
70 |
+
cross_scores = cross_encoder.predict(cross_inp)
|
71 |
+
|
72 |
+
# Sort results by the cross-encoder scores
|
73 |
+
for idx in range(len(cross_scores)):
|
74 |
+
hits[idx]['cross-score'] = cross_scores[idx]
|
75 |
+
|
76 |
+
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
|
77 |
+
predictions = hits[:top_n]
|
78 |
+
return predictions
|
79 |
+
# for hit in hits[0:3]:
|
80 |
+
# print("\t{:.3f}\t{}".format(hit['cross-score'], mypassages[hit['corpus_id']].replace("\n", " ")))
|
81 |
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
def get_text(qry):
|
84 |
+
predictions = search(qry)
|
85 |
+
prediction_text = []
|
86 |
+
for hit in predictions:
|
87 |
+
prediction_text.append("{}".format(mypassages[hit['corpus_id']]))
|
88 |
+
return prediction_text
|
89 |
|
90 |
+
|
91 |
+
@tool("mysearch", return_direct=True)
|
92 |
+
def prt_rslt(query: str) -> str:
|
93 |
+
rslt = get_text(qry)
|
94 |
+
return '\n'.join(rslt)
|
95 |
|
96 |
# prt_rslt("who is the best rapper in the world?")
|
97 |
|
98 |
+
|
99 |
+
# """# chat example"""
|
100 |
+
# def chat(message, history):
|
101 |
+
# history = history or []
|
102 |
+
# message = message.lower()
|
103 |
+
|
104 |
+
# responses = get_text(message)
|
105 |
+
# for response in responses:
|
106 |
+
# history.append((message, response))
|
107 |
+
# return history, history
|
108 |
+
|
109 |
+
|
110 |
+
# with gr.Blocks(css=CSS) as demo:
|
111 |
+
# history_state = gr.State()
|
112 |
+
# gr.Markdown('# WikiBot')
|
113 |
+
# title = 'Wikipedia Chatbot'
|
114 |
+
# description = 'chatbot with search on Wikipedia'
|
115 |
+
# with gr.Row():
|
116 |
+
# chatbot = gr.Chatbot()
|
117 |
+
# with gr.Row():
|
118 |
+
# message = gr.Textbox(label='Input your question here:',
|
119 |
+
# placeholder='How many countries are in Europe?',
|
120 |
+
# lines=1)
|
121 |
+
# submit = gr.Button(value='Send',
|
122 |
+
# variant='secondary').style(full_width=False)
|
123 |
+
# submit.click(chat,
|
124 |
+
# inputs=[message, history_state],
|
125 |
+
# outputs=[chatbot, history_state])
|
126 |
+
# gr.Examples(
|
127 |
+
# examples=["How many countries are in Europe?",
|
128 |
+
# "Was Roman Emperor Constantine I a Christian?",
|
129 |
+
# "Who is the best rapper in the world?"],
|
130 |
+
# inputs=message
|
131 |
+
# )
|
132 |
+
|
133 |
+
# demo.launch()
|
134 |
+
|
135 |
+
|
136 |
+
news_api_key = os.environ["NEWS_API_KEY"]
|
137 |
+
tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
|
138 |
+
|
139 |
+
TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
|
140 |
+
'open-meteo-api'] # 'google-search'
|
141 |
+
TOOLS_DEFAULT_LIST = ['mysearch', 'serpapi', 'pal-math']
|
142 |
+
BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
|
143 |
+
AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it."
|
144 |
+
MAX_TOKENS = 512
|
145 |
+
TEMPERATURE = 0
|
146 |
+
|
147 |
+
LOOPING_TALKING_HEAD = "videos/humancare.mp4"
|
148 |
+
TALKING_HEAD_WIDTH = "192"
|
149 |
+
MAX_TALKING_HEAD_TEXT_LENGTH = 155
|
150 |
+
|
151 |
+
# Pertains to Express-inator functionality
|
152 |
+
NUM_WORDS_DEFAULT = 0
|
153 |
+
MAX_WORDS = 400
|
154 |
+
FORMALITY_DEFAULT = "N/A"
|
155 |
+
TEMPERATURE_DEFAULT = 0.5
|
156 |
+
EMOTION_DEFAULT = "N/A"
|
157 |
+
LANG_LEVEL_DEFAULT = "N/A"
|
158 |
+
TRANSLATE_TO_DEFAULT = "N/A"
|
159 |
+
LITERARY_STYLE_DEFAULT = "N/A"
|
160 |
+
PROMPT_TEMPLATE = PromptTemplate(
|
161 |
+
input_variables=["original_words", "num_words", "formality",
|
162 |
+
"emotions", "lang_level", "translate_to", "literary_style"],
|
163 |
+
template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
|
164 |
+
)
|
165 |
+
|
166 |
+
POLLY_VOICE_DATA = PollyVoiceData()
|
167 |
+
AZURE_VOICE_DATA = AzureVoiceData()
|
168 |
+
VOICE_GENDER = 'Female' # "Male"
|
169 |
+
|
170 |
+
# Pertains to WHISPER functionality
|
171 |
+
WHISPER_DETECT_LANG = "Detect language"
|
172 |
+
|
173 |
+
|
174 |
+
# UNCOMMENT TO USE WHISPER
|
175 |
+
warnings.filterwarnings("ignore")
|
176 |
+
WHISPER_MODEL = whisper.load_model("tiny")
|
177 |
+
print("WHISPER_MODEL", WHISPER_MODEL)
|
178 |
+
|
179 |
+
|
180 |
+
# gradio settings
|
181 |
+
# css
|
182 |
+
CSS = ".gradio-container {background-color: lightgray}"
|
183 |
+
|
184 |
+
# placeholder for chat text input
|
185 |
+
PLACEHOLDER = "What is my plan benefit?",
|
186 |
+
|
187 |
+
# example questions
|
188 |
+
EXAMPLES = ["How many people live in Canada?",
|
189 |
+
"What is 2 to the 30th power?",
|
190 |
+
"If x+y=10 and x-y=4, what are x and y?",
|
191 |
+
"How much did it rain in SF today?",
|
192 |
+
"Get me information about the movie 'Avatar'",
|
193 |
+
"What are the top tech headlines in the US?",
|
194 |
+
"On the desk, you see two blue booklets, two purple booklets, and two yellow pairs of sunglasses - "
|
195 |
+
"if I remove all the pairs of sunglasses from the desk, how many purple items remain on it?"]
|
196 |
+
AUTHORS = """
|
197 |
+
<p>This application, developed by Greg Hayworth, Srikanth Tangelloju, Lincoln Snyder, Michal Piekarczyk, and Xingde Jiang,
|
198 |
+
demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain.
|
199 |
+
When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
|
200 |
+
Uses talking heads from <a href='https://exh.ai/'>Ex-Human</a>.
|
201 |
+
For faster inference without waiting in queue, you may duplicate the space.
|
202 |
+
</p>"""
|
203 |
+
# UNCOMMENT TO USE WHISPER
|
204 |
+
|
205 |
+
|
206 |
+
def transcribe(aud_inp, whisper_lang):
|
207 |
+
if aud_inp is None:
|
208 |
+
return ""
|
209 |
+
aud = whisper.load_audio(aud_inp)
|
210 |
+
aud = whisper.pad_or_trim(aud)
|
211 |
+
mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
|
212 |
+
_, probs = WHISPER_MODEL.detect_language(mel)
|
213 |
+
options = whisper.DecodingOptions()
|
214 |
+
if whisper_lang != WHISPER_DETECT_LANG:
|
215 |
+
whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(
|
216 |
+
whisper_lang)
|
217 |
+
options = whisper.DecodingOptions(language=whisper_lang_code)
|
218 |
+
result = whisper.decode(WHISPER_MODEL, mel, options)
|
219 |
+
print("result.text", result.text)
|
220 |
+
result_text = ""
|
221 |
+
if result and result.text:
|
222 |
+
result_text = result.text
|
223 |
+
return result_text
|
224 |
+
|
225 |
+
|
226 |
+
# Pertains to Express-inator functionality
|
227 |
+
def transform_text(desc, express_chain, num_words, formality,
|
228 |
+
anticipation_level, joy_level, trust_level,
|
229 |
+
fear_level, surprise_level, sadness_level, disgust_level, anger_level,
|
230 |
+
lang_level, translate_to, literary_style):
|
231 |
+
num_words_prompt = ""
|
232 |
+
if num_words and int(num_words) != 0:
|
233 |
+
num_words_prompt = "using up to " + str(num_words) + " words, "
|
234 |
+
|
235 |
+
# Change some arguments to lower case
|
236 |
+
formality = formality.lower()
|
237 |
+
anticipation_level = anticipation_level.lower()
|
238 |
+
joy_level = joy_level.lower()
|
239 |
+
trust_level = trust_level.lower()
|
240 |
+
fear_level = fear_level.lower()
|
241 |
+
surprise_level = surprise_level.lower()
|
242 |
+
sadness_level = sadness_level.lower()
|
243 |
+
disgust_level = disgust_level.lower()
|
244 |
+
anger_level = anger_level.lower()
|
245 |
+
|
246 |
+
formality_str = ""
|
247 |
+
if formality != "n/a":
|
248 |
+
formality_str = "in a " + formality + " manner, "
|
249 |
+
|
250 |
+
# put all emotions into a list
|
251 |
+
emotions = []
|
252 |
+
if anticipation_level != "n/a":
|
253 |
+
emotions.append(anticipation_level)
|
254 |
+
if joy_level != "n/a":
|
255 |
+
emotions.append(joy_level)
|
256 |
+
if trust_level != "n/a":
|
257 |
+
emotions.append(trust_level)
|
258 |
+
if fear_level != "n/a":
|
259 |
+
emotions.append(fear_level)
|
260 |
+
if surprise_level != "n/a":
|
261 |
+
emotions.append(surprise_level)
|
262 |
+
if sadness_level != "n/a":
|
263 |
+
emotions.append(sadness_level)
|
264 |
+
if disgust_level != "n/a":
|
265 |
+
emotions.append(disgust_level)
|
266 |
+
if anger_level != "n/a":
|
267 |
+
emotions.append(anger_level)
|
268 |
+
|
269 |
+
emotions_str = ""
|
270 |
+
if len(emotions) > 0:
|
271 |
+
if len(emotions) == 1:
|
272 |
+
emotions_str = "with emotion of " + emotions[0] + ", "
|
273 |
+
else:
|
274 |
+
emotions_str = "with emotions of " + \
|
275 |
+
", ".join(emotions[:-1]) + " and " + emotions[-1] + ", "
|
276 |
+
|
277 |
+
lang_level_str = ""
|
278 |
+
if lang_level != LANG_LEVEL_DEFAULT:
|
279 |
+
lang_level_str = "at a " + lang_level + \
|
280 |
+
" level, " if translate_to == TRANSLATE_TO_DEFAULT else ""
|
281 |
+
|
282 |
+
translate_to_str = ""
|
283 |
+
if translate_to != TRANSLATE_TO_DEFAULT:
|
284 |
+
translate_to_str = "translated to " + \
|
285 |
+
("" if lang_level == TRANSLATE_TO_DEFAULT else lang_level +
|
286 |
+
" level ") + translate_to + ", "
|
287 |
+
|
288 |
+
literary_style_str = ""
|
289 |
+
if literary_style != LITERARY_STYLE_DEFAULT:
|
290 |
+
if literary_style == "Prose":
|
291 |
+
literary_style_str = "as prose, "
|
292 |
+
elif literary_style == "Summary":
|
293 |
+
literary_style_str = "as a summary, "
|
294 |
+
elif literary_style == "Outline":
|
295 |
+
literary_style_str = "as an outline numbers and lower case letters, "
|
296 |
+
elif literary_style == "Bullets":
|
297 |
+
literary_style_str = "as bullet points using bullets, "
|
298 |
+
elif literary_style == "Poetry":
|
299 |
+
literary_style_str = "as a poem, "
|
300 |
+
elif literary_style == "Haiku":
|
301 |
+
literary_style_str = "as a haiku, "
|
302 |
+
elif literary_style == "Limerick":
|
303 |
+
literary_style_str = "as a limerick, "
|
304 |
+
elif literary_style == "Joke":
|
305 |
+
literary_style_str = "as a very funny joke with a setup and punchline, "
|
306 |
+
elif literary_style == "Knock-knock":
|
307 |
+
literary_style_str = "as a very funny knock-knock joke, "
|
308 |
+
|
309 |
+
formatted_prompt = PROMPT_TEMPLATE.format(
|
310 |
+
original_words=desc,
|
311 |
+
num_words=num_words_prompt,
|
312 |
+
formality=formality_str,
|
313 |
+
emotions=emotions_str,
|
314 |
+
lang_level=lang_level_str,
|
315 |
+
translate_to=translate_to_str,
|
316 |
+
literary_style=literary_style_str
|
317 |
+
)
|
318 |
+
|
319 |
+
trans_instr = num_words_prompt + formality_str + emotions_str + \
|
320 |
+
lang_level_str + translate_to_str + literary_style_str
|
321 |
+
if express_chain and len(trans_instr.strip()) > 0:
|
322 |
+
generated_text = express_chain.run(
|
323 |
+
{'original_words': desc, 'num_words': num_words_prompt, 'formality': formality_str,
|
324 |
+
'emotions': emotions_str, 'lang_level': lang_level_str, 'translate_to': translate_to_str,
|
325 |
+
'literary_style': literary_style_str}).strip()
|
326 |
+
else:
|
327 |
+
print("Not transforming text")
|
328 |
+
generated_text = desc
|
329 |
+
|
330 |
+
# replace all newlines with <br> in generated_text
|
331 |
+
generated_text = generated_text.replace("\n", "\n\n")
|
332 |
+
|
333 |
+
prompt_plus_generated = "GPT prompt: " + \
|
334 |
+
formatted_prompt + "\n\n" + generated_text
|
335 |
+
|
336 |
+
print("\n==== date/time: " + str(datetime.datetime.now() -
|
337 |
+
datetime.timedelta(hours=5)) + " ====")
|
338 |
+
print("prompt_plus_generated: " + prompt_plus_generated)
|
339 |
+
|
340 |
+
return generated_text
|
341 |
+
|
342 |
+
|
343 |
+
def load_chain(tools_list, llm):
|
344 |
+
chain = None
|
345 |
+
express_chain = None
|
346 |
+
if llm:
|
347 |
+
print("\ntools_list", tools_list)
|
348 |
+
tool_names = tools_list
|
349 |
+
tools = load_tools(tool_names, llm=llm, news_api_key=news_api_key,
|
350 |
+
tmdb_bearer_token=tmdb_bearer_token)
|
351 |
+
|
352 |
+
memory = ConversationBufferMemory(memory_key="chat_history")
|
353 |
+
|
354 |
+
chain = initialize_agent(
|
355 |
+
tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
|
356 |
+
express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
|
357 |
+
|
358 |
+
return chain, express_chain
|
359 |
+
|
360 |
+
|
361 |
+
def set_openai_api_key(api_key):
|
362 |
+
"""Set the api key and return chain.
|
363 |
+
If no api_key, then None is returned.
|
364 |
+
"""
|
365 |
+
if api_key and api_key.startswith("sk-") and len(api_key) > 50:
|
366 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
367 |
+
llm = OpenAI(temperature=TEMPERATURE, max_tokens=MAX_TOKENS)
|
368 |
+
chain, express_chain = load_chain(TOOLS_DEFAULT_LIST, llm)
|
369 |
+
os.environ["OPENAI_API_KEY"] = ""
|
370 |
+
return chain, express_chain, llm
|
371 |
+
return None, None, None
|
372 |
+
|
373 |
+
|
374 |
+
def run_chain(chain, inp, capture_hidden_text):
|
375 |
+
output = ""
|
376 |
+
hidden_text = None
|
377 |
+
if capture_hidden_text:
|
378 |
+
error_msg = None
|
379 |
+
tmp = sys.stdout
|
380 |
+
hidden_text_io = StringIO()
|
381 |
+
sys.stdout = hidden_text_io
|
382 |
+
|
383 |
+
try:
|
384 |
+
output = chain.run(input=inp)
|
385 |
+
except AuthenticationError as ae:
|
386 |
+
error_msg = AUTH_ERR_MSG
|
387 |
+
except RateLimitError as rle:
|
388 |
+
error_msg = "\n\nRateLimitError: " + str(rle)
|
389 |
+
except ValueError as ve:
|
390 |
+
error_msg = "\n\nValueError: " + str(ve)
|
391 |
+
except InvalidRequestError as ire:
|
392 |
+
error_msg = "\n\nInvalidRequestError: " + str(ire)
|
393 |
+
except Exception as e:
|
394 |
+
error_msg = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
|
395 |
+
|
396 |
+
sys.stdout = tmp
|
397 |
+
hidden_text = hidden_text_io.getvalue()
|
398 |
+
|
399 |
+
# remove escape characters from hidden_text
|
400 |
+
hidden_text = re.sub(r'\x1b[^m]*m', '', hidden_text)
|
401 |
+
|
402 |
+
# remove "Entering new AgentExecutor chain..." from hidden_text
|
403 |
+
hidden_text = re.sub(
|
404 |
+
r"Entering new AgentExecutor chain...\n", "", hidden_text)
|
405 |
+
|
406 |
+
# remove "Finished chain." from hidden_text
|
407 |
+
hidden_text = re.sub(r"Finished chain.", "", hidden_text)
|
408 |
+
|
409 |
+
# Add newline after "Thought:" "Action:" "Observation:" "Input:" and "AI:"
|
410 |
+
hidden_text = re.sub(r"Thought:", "\n\nThought:", hidden_text)
|
411 |
+
hidden_text = re.sub(r"Action:", "\n\nAction:", hidden_text)
|
412 |
+
hidden_text = re.sub(r"Observation:", "\n\nObservation:", hidden_text)
|
413 |
+
hidden_text = re.sub(r"Input:", "\n\nInput:", hidden_text)
|
414 |
+
hidden_text = re.sub(r"AI:", "\n\nAI:", hidden_text)
|
415 |
+
|
416 |
+
if error_msg:
|
417 |
+
hidden_text += error_msg
|
418 |
+
|
419 |
+
print("hidden_text: ", hidden_text)
|
420 |
+
else:
|
421 |
+
try:
|
422 |
+
output = chain.run(input=inp)
|
423 |
+
except AuthenticationError as ae:
|
424 |
+
output = AUTH_ERR_MSG
|
425 |
+
except RateLimitError as rle:
|
426 |
+
output = "\n\nRateLimitError: " + str(rle)
|
427 |
+
except ValueError as ve:
|
428 |
+
output = "\n\nValueError: " + str(ve)
|
429 |
+
except InvalidRequestError as ire:
|
430 |
+
output = "\n\nInvalidRequestError: " + str(ire)
|
431 |
+
except Exception as e:
|
432 |
+
output = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
|
433 |
+
|
434 |
+
return output, hidden_text
|
435 |
+
|
436 |
+
|
437 |
+
class ChatWrapper:
|
438 |
+
|
439 |
+
def __init__(self):
|
440 |
+
self.lock = Lock()
|
441 |
+
|
442 |
+
def __call__(
|
443 |
+
self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
|
444 |
+
trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain],
|
445 |
+
num_words, formality, anticipation_level, joy_level, trust_level,
|
446 |
+
fear_level, surprise_level, sadness_level, disgust_level, anger_level,
|
447 |
+
lang_level, translate_to, literary_style
|
448 |
+
):
|
449 |
+
"""Execute the chat functionality."""
|
450 |
+
self.lock.acquire()
|
451 |
+
try:
|
452 |
+
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
|
453 |
+
print("inp: " + inp)
|
454 |
+
print("trace_chain: ", trace_chain)
|
455 |
+
print("speak_text: ", speak_text)
|
456 |
+
print("talking_head: ", talking_head)
|
457 |
+
print("monologue: ", monologue)
|
458 |
+
history = history or []
|
459 |
+
# If chain is None, that is because no API key was provided.
|
460 |
+
output = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or " \
|
461 |
+
"key after pasting it."
|
462 |
+
hidden_text = output
|
463 |
+
|
464 |
+
if chain and chain != "":
|
465 |
+
# Set OpenAI key
|
466 |
+
import openai
|
467 |
+
openai.api_key = api_key
|
468 |
+
if not monologue:
|
469 |
+
output, hidden_text = run_chain(
|
470 |
+
chain, inp, capture_hidden_text=trace_chain)
|
471 |
+
else:
|
472 |
+
output, hidden_text = inp, None
|
473 |
+
|
474 |
+
output = transform_text(output, express_chain, num_words, formality, anticipation_level, joy_level,
|
475 |
+
trust_level,
|
476 |
+
fear_level, surprise_level, sadness_level, disgust_level, anger_level,
|
477 |
+
lang_level, translate_to, literary_style)
|
478 |
+
|
479 |
+
text_to_display = output
|
480 |
+
if trace_chain:
|
481 |
+
text_to_display = hidden_text + "\n\n" + output
|
482 |
+
history.append((inp, text_to_display))
|
483 |
+
|
484 |
+
html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
|
485 |
+
if speak_text:
|
486 |
+
if talking_head:
|
487 |
+
if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH:
|
488 |
+
html_video, temp_file = do_html_video_speak(
|
489 |
+
output, translate_to)
|
490 |
+
else:
|
491 |
+
temp_file = LOOPING_TALKING_HEAD
|
492 |
+
html_video = create_html_video(
|
493 |
+
temp_file, TALKING_HEAD_WIDTH)
|
494 |
+
html_audio, temp_aud_file = do_html_audio_speak(
|
495 |
+
output, translate_to)
|
496 |
+
else:
|
497 |
+
html_audio, temp_aud_file = do_html_audio_speak(
|
498 |
+
output, translate_to)
|
499 |
+
else:
|
500 |
+
if talking_head:
|
501 |
+
temp_file = LOOPING_TALKING_HEAD
|
502 |
+
html_video = create_html_video(
|
503 |
+
temp_file, TALKING_HEAD_WIDTH)
|
504 |
+
else:
|
505 |
+
# html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
|
506 |
+
# html_video = create_html_video(temp_file, "128")
|
507 |
+
pass
|
508 |
+
|
509 |
+
except Exception as e:
|
510 |
+
raise e
|
511 |
+
finally:
|
512 |
+
self.lock.release()
|
513 |
+
return history, history, html_video, temp_file, html_audio, temp_aud_file, ""
|
514 |
+
# return history, history, html_audio, temp_aud_file, ""
|
515 |
+
|
516 |
+
|
517 |
+
chat = ChatWrapper()
|
518 |
+
|
519 |
+
|
520 |
+
def do_html_audio_speak(words_to_speak, polly_language):
|
521 |
+
polly_client = boto3.Session(
|
522 |
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
523 |
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
524 |
+
region_name=os.environ["AWS_DEFAULT_REGION"]
|
525 |
+
).client('polly')
|
526 |
+
|
527 |
+
voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(
|
528 |
+
polly_language, VOICE_GENDER)
|
529 |
+
if not voice_id:
|
530 |
+
voice_id = "Joanna"
|
531 |
+
# voice_id = "Matthew"
|
532 |
+
language_code = "en-US"
|
533 |
+
engine = NEURAL_ENGINE
|
534 |
+
response = polly_client.synthesize_speech(
|
535 |
+
Text=words_to_speak,
|
536 |
+
OutputFormat='mp3',
|
537 |
+
VoiceId=voice_id,
|
538 |
+
LanguageCode=language_code,
|
539 |
+
Engine=engine
|
540 |
+
)
|
541 |
+
|
542 |
+
html_audio = '<pre>no audio</pre>'
|
543 |
+
|
544 |
+
# Save the audio stream returned by Amazon Polly on Lambda's temp directory
|
545 |
+
if "AudioStream" in response:
|
546 |
+
with closing(response["AudioStream"]) as stream:
|
547 |
+
# output = os.path.join("/tmp/", "speech.mp3")
|
548 |
+
|
549 |
+
try:
|
550 |
+
with open('audios/tempfile.mp3', 'wb') as f:
|
551 |
+
f.write(stream.read())
|
552 |
+
temp_aud_file = gr.File("audios/tempfile.mp3")
|
553 |
+
temp_aud_file_url = "/file=" + temp_aud_file.value['name']
|
554 |
+
html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
|
555 |
+
except IOError as error:
|
556 |
+
# Could not write to file, exit gracefully
|
557 |
+
print(error)
|
558 |
+
return None, None
|
559 |
+
else:
|
560 |
+
# The response didn't contain audio data, exit gracefully
|
561 |
+
print("Could not stream audio")
|
562 |
+
return None, None
|
563 |
+
|
564 |
+
return html_audio, "audios/tempfile.mp3"
|
565 |
+
|
566 |
+
|
567 |
+
def create_html_video(file_name, width):
|
568 |
+
temp_file_url = "/file=" + tmp_file.value['name']
|
569 |
+
html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="humancare.jpg"></video>'
|
570 |
+
return html_video
|
571 |
+
|
572 |
+
|
573 |
+
def do_html_video_speak(words_to_speak, azure_language):
|
574 |
+
azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, VOICE_GENDER)
|
575 |
+
if not azure_voice:
|
576 |
+
azure_voice = "en-US-ChristopherNeural"
|
577 |
+
|
578 |
+
headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
|
579 |
+
body = {
|
580 |
+
'bot_name': 'humancare',
|
581 |
+
'bot_response': words_to_speak,
|
582 |
+
'azure_voice': azure_voice,
|
583 |
+
'azure_style': 'friendly',
|
584 |
+
'animation_pipeline': 'high_speed',
|
585 |
+
}
|
586 |
+
api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
|
587 |
+
res = requests.post(api_endpoint, json=body, headers=headers)
|
588 |
+
print("res.status_code: ", res.status_code)
|
589 |
+
|
590 |
+
html_video = '<pre>no video</pre>'
|
591 |
+
if isinstance(res.content, bytes):
|
592 |
+
response_stream = io.BytesIO(res.content)
|
593 |
+
print("len(res.content)): ", len(res.content))
|
594 |
+
|
595 |
+
with open('videos/tempfile.mp4', 'wb') as f:
|
596 |
+
f.write(response_stream.read())
|
597 |
+
temp_file = gr.File("videos/tempfile.mp4")
|
598 |
+
temp_file_url = "/file=" + temp_file.value['name']
|
599 |
+
html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="humancare.jpg"></video>'
|
600 |
+
else:
|
601 |
+
print('video url unknown')
|
602 |
+
return html_video, "videos/tempfile.mp4"
|
603 |
+
|
604 |
+
|
605 |
+
def update_selected_tools(widget, state, llm):
|
606 |
+
if widget:
|
607 |
+
state = widget
|
608 |
+
chain, express_chain = load_chain(state, llm)
|
609 |
+
return state, llm, chain, express_chain
|
610 |
+
|
611 |
+
|
612 |
+
def update_talking_head(widget, state):
|
613 |
+
if widget:
|
614 |
+
state = widget
|
615 |
+
|
616 |
+
video_html_talking_head = create_html_video(
|
617 |
+
LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
|
618 |
+
return state, video_html_talking_head
|
619 |
+
else:
|
620 |
+
# return state, create_html_video(LOOPING_TALKING_HEAD, "32")
|
621 |
+
return None, "<pre></pre>"
|
622 |
+
|
623 |
+
|
624 |
+
def update_foo(widget, state):
|
625 |
+
if widget:
|
626 |
+
state = widget
|
627 |
+
return state
|
628 |
+
|
629 |
+
|
630 |
+
with gr.Blocks(css=CSS) as block:
|
631 |
+
llm_state = gr.State()
|
632 |
+
history_state = gr.State()
|
633 |
+
chain_state = gr.State()
|
634 |
+
express_chain_state = gr.State()
|
635 |
+
tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
|
636 |
+
trace_chain_state = gr.State(False)
|
637 |
+
speak_text_state = gr.State(False)
|
638 |
+
talking_head_state = gr.State(True)
|
639 |
+
# Takes the input and repeats it back to the user, optionally transforming it.
|
640 |
+
monologue_state = gr.State(False)
|
641 |
+
|
642 |
+
# Pertains to Express-inator functionality
|
643 |
+
num_words_state = gr.State(NUM_WORDS_DEFAULT)
|
644 |
+
formality_state = gr.State(FORMALITY_DEFAULT)
|
645 |
+
anticipation_level_state = gr.State(EMOTION_DEFAULT)
|
646 |
+
joy_level_state = gr.State(EMOTION_DEFAULT)
|
647 |
+
trust_level_state = gr.State(EMOTION_DEFAULT)
|
648 |
+
fear_level_state = gr.State(EMOTION_DEFAULT)
|
649 |
+
surprise_level_state = gr.State(EMOTION_DEFAULT)
|
650 |
+
sadness_level_state = gr.State(EMOTION_DEFAULT)
|
651 |
+
disgust_level_state = gr.State(EMOTION_DEFAULT)
|
652 |
+
anger_level_state = gr.State(EMOTION_DEFAULT)
|
653 |
+
lang_level_state = gr.State(LANG_LEVEL_DEFAULT)
|
654 |
+
translate_to_state = gr.State(TRANSLATE_TO_DEFAULT)
|
655 |
+
literary_style_state = gr.State(LITERARY_STYLE_DEFAULT)
|
656 |
+
|
657 |
+
# Pertains to WHISPER functionality
|
658 |
+
whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
|
659 |
+
|
660 |
+
with gr.Tab("Chat"):
|
661 |
+
with gr.Row():
|
662 |
+
# with gr.Column():
|
663 |
+
# gr.HTML(
|
664 |
+
# """<b><center>GPT + WolframAlpha + Whisper</center></b>
|
665 |
+
# <p><center>New feature in <b>Translate to</b>: Choose <b>Language level</b> (e.g. for conversation practice or explain like I'm five)</center></p>""")
|
666 |
+
|
667 |
+
openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
|
668 |
+
show_label=False, lines=1, type='password')
|
669 |
+
|
670 |
+
with gr.Row():
|
671 |
+
with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
|
672 |
+
speak_text_cb = gr.Checkbox(label="Enable speech", value=False)
|
673 |
+
speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
|
674 |
+
outputs=[speak_text_state])
|
675 |
+
|
676 |
+
my_file = gr.File(label="Upload a file",
|
677 |
+
type="file", visible=False)
|
678 |
+
tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
|
679 |
+
# tmp_file_url = "/file=" + tmp_file.value['name']
|
680 |
+
htm_video = create_html_video(
|
681 |
+
LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
|
682 |
+
video_html = gr.HTML(htm_video)
|
683 |
+
|
684 |
+
# my_aud_file = gr.File(label="Audio file", type="file", visible=True)
|
685 |
+
tmp_aud_file = gr.File("audios/tempfile.mp3", visible=False)
|
686 |
+
tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
|
687 |
+
htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
|
688 |
+
audio_html = gr.HTML(htm_audio)
|
689 |
+
|
690 |
+
with gr.Column(scale=7):
|
691 |
+
chatbot = gr.Chatbot()
|
692 |
+
|
693 |
+
with gr.Row():
|
694 |
+
message = gr.Textbox(label="What's on your mind??",
|
695 |
+
placeholder=PLACEHOLDER,
|
696 |
+
lines=1)
|
697 |
+
submit = gr.Button(value="Send", variant="secondary").style(
|
698 |
+
full_width=False)
|
699 |
+
|
700 |
+
# UNCOMMENT TO USE WHISPER
|
701 |
+
with gr.Row():
|
702 |
+
audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
|
703 |
+
interactive=True, streaming=False)
|
704 |
+
audio_comp.change(transcribe, inputs=[
|
705 |
+
audio_comp, whisper_lang_state], outputs=[message])
|
706 |
+
|
707 |
+
gr.Examples(
|
708 |
+
examples=EXAMPLES,
|
709 |
inputs=message
|
710 |
)
|
711 |
|
712 |
+
with gr.Tab("Settings"):
|
713 |
+
tools_cb_group = gr.CheckboxGroup(label="Tools:", choices=TOOLS_LIST,
|
714 |
+
value=TOOLS_DEFAULT_LIST)
|
715 |
+
tools_cb_group.change(update_selected_tools,
|
716 |
+
inputs=[tools_cb_group,
|
717 |
+
tools_list_state, llm_state],
|
718 |
+
outputs=[tools_list_state, llm_state, chain_state, express_chain_state])
|
719 |
+
|
720 |
+
trace_chain_cb = gr.Checkbox(
|
721 |
+
label="Show reasoning chain in chat bubble", value=False)
|
722 |
+
trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
|
723 |
+
outputs=[trace_chain_state])
|
724 |
+
|
725 |
+
# speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
|
726 |
+
# speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
|
727 |
+
# outputs=[speak_text_state])
|
728 |
+
|
729 |
+
talking_head_cb = gr.Checkbox(label="Show talking head", value=True)
|
730 |
+
talking_head_cb.change(update_talking_head, inputs=[talking_head_cb, talking_head_state],
|
731 |
+
outputs=[talking_head_state, video_html])
|
732 |
+
|
733 |
+
monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
|
734 |
+
value=False)
|
735 |
+
monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
|
736 |
+
outputs=[monologue_state])
|
737 |
+
|
738 |
+
with gr.Tab("Whisper STT"):
|
739 |
+
whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
|
740 |
+
WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
|
741 |
+
"Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
|
742 |
+
"English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
|
743 |
+
"German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
|
744 |
+
"Korean", "Norwegian", "Polish",
|
745 |
+
"Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
|
746 |
+
"Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh"],
|
747 |
+
value=WHISPER_DETECT_LANG)
|
748 |
+
|
749 |
+
whisper_lang_radio.change(update_foo,
|
750 |
+
inputs=[whisper_lang_radio,
|
751 |
+
whisper_lang_state],
|
752 |
+
outputs=[whisper_lang_state])
|
753 |
+
|
754 |
+
with gr.Tab("Translate to"):
|
755 |
+
lang_level_radio = gr.Radio(label="Language level:", choices=[
|
756 |
+
LANG_LEVEL_DEFAULT, "1st grade", "2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade",
|
757 |
+
"7th grade", "8th grade", "9th grade", "10th grade", "11th grade", "12th grade", "University"],
|
758 |
+
value=LANG_LEVEL_DEFAULT)
|
759 |
+
lang_level_radio.change(update_foo, inputs=[lang_level_radio, lang_level_state],
|
760 |
+
outputs=[lang_level_state])
|
761 |
+
|
762 |
+
translate_to_radio = gr.Radio(label="Language:", choices=[
|
763 |
+
TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
|
764 |
+
"Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
|
765 |
+
"English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
|
766 |
+
"German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
|
767 |
+
"Korean", "Norwegian", "Polish",
|
768 |
+
"Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
|
769 |
+
"Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh",
|
770 |
+
"emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon",
|
771 |
+
"Pirate", "Strange Planet expospeak technical talk", "Yoda"],
|
772 |
+
value=TRANSLATE_TO_DEFAULT)
|
773 |
+
|
774 |
+
translate_to_radio.change(update_foo,
|
775 |
+
inputs=[translate_to_radio,
|
776 |
+
translate_to_state],
|
777 |
+
outputs=[translate_to_state])
|
778 |
+
|
779 |
+
with gr.Tab("Formality"):
|
780 |
+
formality_radio = gr.Radio(label="Formality:",
|
781 |
+
choices=[FORMALITY_DEFAULT,
|
782 |
+
"Casual", "Polite", "Honorific"],
|
783 |
+
value=FORMALITY_DEFAULT)
|
784 |
+
formality_radio.change(update_foo,
|
785 |
+
inputs=[formality_radio, formality_state],
|
786 |
+
outputs=[formality_state])
|
787 |
+
|
788 |
+
with gr.Tab("Lit style"):
|
789 |
+
literary_style_radio = gr.Radio(label="Literary style:", choices=[
|
790 |
+
LITERARY_STYLE_DEFAULT, "Prose", "Summary", "Outline", "Bullets", "Poetry", "Haiku", "Limerick", "Joke",
|
791 |
+
"Knock-knock"],
|
792 |
+
value=LITERARY_STYLE_DEFAULT)
|
793 |
+
|
794 |
+
literary_style_radio.change(update_foo,
|
795 |
+
inputs=[literary_style_radio,
|
796 |
+
literary_style_state],
|
797 |
+
outputs=[literary_style_state])
|
798 |
+
|
799 |
+
with gr.Tab("Emotions"):
|
800 |
+
anticipation_level_radio = gr.Radio(label="Anticipation level:",
|
801 |
+
choices=[
|
802 |
+
EMOTION_DEFAULT, "Interest", "Anticipation", "Vigilance"],
|
803 |
+
value=EMOTION_DEFAULT)
|
804 |
+
anticipation_level_radio.change(update_foo,
|
805 |
+
inputs=[anticipation_level_radio,
|
806 |
+
anticipation_level_state],
|
807 |
+
outputs=[anticipation_level_state])
|
808 |
+
|
809 |
+
joy_level_radio = gr.Radio(label="Joy level:",
|
810 |
+
choices=[EMOTION_DEFAULT,
|
811 |
+
"Serenity", "Joy", "Ecstasy"],
|
812 |
+
value=EMOTION_DEFAULT)
|
813 |
+
joy_level_radio.change(update_foo,
|
814 |
+
inputs=[joy_level_radio, joy_level_state],
|
815 |
+
outputs=[joy_level_state])
|
816 |
+
|
817 |
+
trust_level_radio = gr.Radio(label="Trust level:",
|
818 |
+
choices=[
|
819 |
+
EMOTION_DEFAULT, "Acceptance", "Trust", "Admiration"],
|
820 |
+
value=EMOTION_DEFAULT)
|
821 |
+
trust_level_radio.change(update_foo,
|
822 |
+
inputs=[trust_level_radio, trust_level_state],
|
823 |
+
outputs=[trust_level_state])
|
824 |
+
|
825 |
+
fear_level_radio = gr.Radio(label="Fear level:",
|
826 |
+
choices=[EMOTION_DEFAULT,
|
827 |
+
"Apprehension", "Fear", "Terror"],
|
828 |
+
value=EMOTION_DEFAULT)
|
829 |
+
fear_level_radio.change(update_foo,
|
830 |
+
inputs=[fear_level_radio, fear_level_state],
|
831 |
+
outputs=[fear_level_state])
|
832 |
+
|
833 |
+
surprise_level_radio = gr.Radio(label="Surprise level:",
|
834 |
+
choices=[
|
835 |
+
EMOTION_DEFAULT, "Distraction", "Surprise", "Amazement"],
|
836 |
+
value=EMOTION_DEFAULT)
|
837 |
+
surprise_level_radio.change(update_foo,
|
838 |
+
inputs=[surprise_level_radio,
|
839 |
+
surprise_level_state],
|
840 |
+
outputs=[surprise_level_state])
|
841 |
+
|
842 |
+
sadness_level_radio = gr.Radio(label="Sadness level:",
|
843 |
+
choices=[
|
844 |
+
EMOTION_DEFAULT, "Pensiveness", "Sadness", "Grief"],
|
845 |
+
value=EMOTION_DEFAULT)
|
846 |
+
sadness_level_radio.change(update_foo,
|
847 |
+
inputs=[sadness_level_radio,
|
848 |
+
sadness_level_state],
|
849 |
+
outputs=[sadness_level_state])
|
850 |
+
|
851 |
+
disgust_level_radio = gr.Radio(label="Disgust level:",
|
852 |
+
choices=[EMOTION_DEFAULT,
|
853 |
+
"Boredom", "Disgust", "Loathing"],
|
854 |
+
value=EMOTION_DEFAULT)
|
855 |
+
disgust_level_radio.change(update_foo,
|
856 |
+
inputs=[disgust_level_radio,
|
857 |
+
disgust_level_state],
|
858 |
+
outputs=[disgust_level_state])
|
859 |
+
|
860 |
+
anger_level_radio = gr.Radio(label="Anger level:",
|
861 |
+
choices=[EMOTION_DEFAULT,
|
862 |
+
"Annoyance", "Anger", "Rage"],
|
863 |
+
value=EMOTION_DEFAULT)
|
864 |
+
anger_level_radio.change(update_foo,
|
865 |
+
inputs=[anger_level_radio, anger_level_state],
|
866 |
+
outputs=[anger_level_state])
|
867 |
+
|
868 |
+
with gr.Tab("Max words"):
|
869 |
+
num_words_slider = gr.Slider(label="Max number of words to generate (0 for don't care)",
|
870 |
+
value=NUM_WORDS_DEFAULT, minimum=0, maximum=MAX_WORDS, step=10)
|
871 |
+
num_words_slider.change(update_foo,
|
872 |
+
inputs=[num_words_slider, num_words_state],
|
873 |
+
outputs=[num_words_state])
|
874 |
+
|
875 |
+
gr.HTML(AUTHORS)
|
876 |
+
|
877 |
+
# gr.HTML("""
|
878 |
+
# <form action="https://www.paypal.com/donate" method="post" target="_blank">
|
879 |
+
# <input type="hidden" name="business" value="AK8BVNALBXSPQ" />
|
880 |
+
# <input type="hidden" name="no_recurring" value="0" />
|
881 |
+
# <input type="hidden" name="item_name" value="Please consider helping to defray the cost of APIs such as SerpAPI and WolframAlpha that this app uses." />
|
882 |
+
# <input type="hidden" name="currency_code" value="USD" />
|
883 |
+
# <input type="image" src="https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif" border="0" name="submit" title="PayPal - The safer, easier way to pay online!" alt="Donate with PayPal button" />
|
884 |
+
# <img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
|
885 |
+
# </form>
|
886 |
+
# """)
|
887 |
+
|
888 |
+
gr.HTML("""<center>
|
889 |
+
<a href="https://huggingface.co/spaces/gfhayworth/hack_qa?duplicate=true">
|
890 |
+
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
891 |
+
Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a>
|
892 |
+
</center>""")
|
893 |
+
|
894 |
+
message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
|
895 |
+
speak_text_state, talking_head_state, monologue_state,
|
896 |
+
express_chain_state, num_words_state, formality_state,
|
897 |
+
anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
|
898 |
+
surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
|
899 |
+
lang_level_state, translate_to_state, literary_style_state],
|
900 |
+
outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
|
901 |
+
# outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
|
902 |
+
|
903 |
+
submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
|
904 |
+
speak_text_state, talking_head_state, monologue_state,
|
905 |
+
express_chain_state, num_words_state, formality_state,
|
906 |
+
anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
|
907 |
+
surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
|
908 |
+
lang_level_state, translate_to_state, literary_style_state],
|
909 |
+
outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
|
910 |
+
# outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
|
911 |
+
|
912 |
+
openai_api_key_textbox.change(set_openai_api_key,
|
913 |
+
inputs=[openai_api_key_textbox],
|
914 |
+
outputs=[chain_state, express_chain_state, llm_state])
|
915 |
|
916 |
+
block.launch(debug=True)
|
audios/tempfile.mp3
ADDED
Binary file (785 kB). View file
|
|
azure_utils.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This class stores Azure voice data. Specifically, the class stores several records containing
|
2 |
+
# language, lang_code, gender, voice_id and engine. The class also has a method to return the
|
3 |
+
# voice_id, lang_code and engine given a language and gender.
|
4 |
+
|
5 |
+
NEURAL_ENGINE = "neural"
|
6 |
+
STANDARD_ENGINE = "standard"
|
7 |
+
|
8 |
+
|
9 |
+
class AzureVoiceData:
|
10 |
+
def get_voice(self, language, gender):
|
11 |
+
for voice in self.voice_data:
|
12 |
+
if voice['language'] == language and voice['gender'] == gender:
|
13 |
+
return voice['azure_voice']
|
14 |
+
return None
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
self.voice_data = [
|
18 |
+
{'language': 'Arabic',
|
19 |
+
'azure_voice': 'ar-EG-ShakirNeural',
|
20 |
+
'gender': 'Male'},
|
21 |
+
{'language': 'Arabic (Gulf)',
|
22 |
+
'azure_voice': 'ar-KW-FahedNeural',
|
23 |
+
'gender': 'Male'},
|
24 |
+
{'language': 'Catalan',
|
25 |
+
'azure_voice': 'ca-ES-EnricNeural',
|
26 |
+
'gender': 'Male'},
|
27 |
+
{'language': 'Chinese (Cantonese)',
|
28 |
+
'azure_voice': 'yue-CN-YunSongNeural',
|
29 |
+
'gender': 'Male'},
|
30 |
+
{'language': 'Chinese (Mandarin)',
|
31 |
+
'azure_voice': 'zh-CN-YunxiNeural',
|
32 |
+
'gender': 'Male'},
|
33 |
+
{'language': 'Danish',
|
34 |
+
'azure_voice': 'da-DK-JeppeNeural',
|
35 |
+
'gender': 'Male'},
|
36 |
+
{'language': 'Dutch',
|
37 |
+
'azure_voice': 'nl-NL-MaartenNeural',
|
38 |
+
'gender': 'Male'},
|
39 |
+
{'language': 'English (Australian)',
|
40 |
+
'azure_voice': 'en-AU-KenNeural',
|
41 |
+
'gender': 'Male'},
|
42 |
+
{'language': 'English (British)',
|
43 |
+
'azure_voice': 'en-GB-RyanNeural',
|
44 |
+
'gender': 'Male'},
|
45 |
+
{'language': 'English (Indian)',
|
46 |
+
'azure_voice': 'en-IN-PrabhatNeural',
|
47 |
+
'gender': 'Male'},
|
48 |
+
{'language': 'English (New Zealand)',
|
49 |
+
'azure_voice': 'en-NZ-MitchellNeural',
|
50 |
+
'gender': 'Male'},
|
51 |
+
{'language': 'English (South African)',
|
52 |
+
'azure_voice': 'en-ZA-LukeNeural',
|
53 |
+
'gender': 'Male'},
|
54 |
+
{'language': 'English (US)',
|
55 |
+
'azure_voice': 'en-US-ChristopherNeural',
|
56 |
+
'gender': 'Male'},
|
57 |
+
{'language': 'English (Welsh)',
|
58 |
+
'azure_voice': 'cy-GB-AledNeural',
|
59 |
+
'gender': 'Male'},
|
60 |
+
{'language': 'Finnish',
|
61 |
+
'azure_voice': 'fi-FI-HarriNeural',
|
62 |
+
'gender': 'Male'},
|
63 |
+
{'language': 'French',
|
64 |
+
'azure_voice': 'fr-FR-HenriNeural',
|
65 |
+
'gender': 'Male'},
|
66 |
+
{'language': 'French (Canadian)',
|
67 |
+
'azure_voice': 'fr-CA-AntoineNeural',
|
68 |
+
'gender': 'Male'},
|
69 |
+
{'language': 'German',
|
70 |
+
'azure_voice': 'de-DE-KlausNeural',
|
71 |
+
'gender': 'Male'},
|
72 |
+
{'language': 'German (Austrian)',
|
73 |
+
'azure_voice': 'de-AT-JonasNeural',
|
74 |
+
'gender': 'Male'},
|
75 |
+
{'language': 'Hindi',
|
76 |
+
'azure_voice': 'hi-IN-MadhurNeural',
|
77 |
+
'gender': 'Male'},
|
78 |
+
{'language': 'Icelandic',
|
79 |
+
'azure_voice': 'is-IS-GunnarNeural',
|
80 |
+
'gender': 'Male'},
|
81 |
+
{'language': 'Italian',
|
82 |
+
'azure_voice': 'it-IT-GianniNeural',
|
83 |
+
'gender': 'Male'},
|
84 |
+
{'language': 'Japanese',
|
85 |
+
'azure_voice': 'ja-JP-KeitaNeural',
|
86 |
+
'gender': 'Male'},
|
87 |
+
{'language': 'Korean',
|
88 |
+
'azure_voice': 'ko-KR-GookMinNeural',
|
89 |
+
'gender': 'Male'},
|
90 |
+
{'language': 'Norwegian',
|
91 |
+
'azure_voice': 'nb-NO-FinnNeural',
|
92 |
+
'gender': 'Male'},
|
93 |
+
{'language': 'Polish',
|
94 |
+
'azure_voice': 'pl-PL-MarekNeural',
|
95 |
+
'gender': 'Male'},
|
96 |
+
{'language': 'Portuguese (Brazilian)',
|
97 |
+
'azure_voice': 'pt-BR-NicolauNeural',
|
98 |
+
'gender': 'Male'},
|
99 |
+
{'language': 'Portuguese (European)',
|
100 |
+
'azure_voice': 'pt-PT-DuarteNeural',
|
101 |
+
'gender': 'Male'},
|
102 |
+
{'language': 'Romanian',
|
103 |
+
'azure_voice': 'ro-RO-EmilNeural',
|
104 |
+
'gender': 'Male'},
|
105 |
+
{'language': 'Russian',
|
106 |
+
'azure_voice': 'ru-RU-DmitryNeural',
|
107 |
+
'gender': 'Male'},
|
108 |
+
{'language': 'Spanish (European)',
|
109 |
+
'azure_voice': 'es-ES-TeoNeural',
|
110 |
+
'gender': 'Male'},
|
111 |
+
{'language': 'Spanish (Mexican)',
|
112 |
+
'azure_voice': 'es-MX-LibertoNeural',
|
113 |
+
'gender': 'Male'},
|
114 |
+
{'language': 'Spanish (US)',
|
115 |
+
'azure_voice': 'es-US-AlonsoNeural"',
|
116 |
+
'gender': 'Male'},
|
117 |
+
{'language': 'Swedish',
|
118 |
+
'azure_voice': 'sv-SE-MattiasNeural',
|
119 |
+
'gender': 'Male'},
|
120 |
+
{'language': 'Turkish',
|
121 |
+
'azure_voice': 'tr-TR-AhmetNeural',
|
122 |
+
'gender': 'Male'},
|
123 |
+
{'language': 'Welsh',
|
124 |
+
'azure_voice': 'cy-GB-AledNeural',
|
125 |
+
'gender': 'Male'},
|
126 |
+
]
|
127 |
+
|
128 |
+
|
129 |
+
# Run from the command-line
|
130 |
+
if __name__ == '__main__':
|
131 |
+
azure_voice_data = AzureVoiceData()
|
132 |
+
|
133 |
+
azure_voice = azure_voice_data.get_voice('English (US)', 'Male')
|
134 |
+
print('English (US)', 'Male', azure_voice)
|
135 |
+
|
136 |
+
azure_voice = azure_voice_data.get_voice('English (US)', 'Female')
|
137 |
+
print('English (US)', 'Female', azure_voice)
|
138 |
+
|
139 |
+
azure_voice = azure_voice_data.get_voice('French', 'Female')
|
140 |
+
print('French', 'Female', azure_voice)
|
141 |
+
|
142 |
+
azure_voice = azure_voice_data.get_voice('French', 'Male')
|
143 |
+
print('French', 'Male', azure_voice)
|
144 |
+
|
145 |
+
azure_voice = azure_voice_data.get_voice('Japanese', 'Female')
|
146 |
+
print('Japanese', 'Female', azure_voice)
|
147 |
+
|
148 |
+
azure_voice = azure_voice_data.get_voice('Japanese', 'Male')
|
149 |
+
print('Japanese', 'Male', azure_voice)
|
150 |
+
|
151 |
+
azure_voice = azure_voice_data.get_voice('Hindi', 'Female')
|
152 |
+
print('Hindi', 'Female', azure_voice)
|
153 |
+
|
154 |
+
azure_voice = azure_voice_data.get_voice('Hindi', 'Male')
|
155 |
+
print('Hindi', 'Male', azure_voice)
|
images/humancare.jpg
ADDED
![]() |
polly_utils.py
ADDED
@@ -0,0 +1,635 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This class stores Polly voice data. Specifically, the class stores several records containing
|
2 |
+
# language, lang_code, gender, voice_id and engine. The class also has a method to return the
|
3 |
+
# voice_id, lang_code and engine given a language and gender.
|
4 |
+
|
5 |
+
NEURAL_ENGINE = "neural"
|
6 |
+
STANDARD_ENGINE = "standard"
|
7 |
+
|
8 |
+
|
9 |
+
class PollyVoiceData:
|
10 |
+
def get_voice(self, language, gender):
|
11 |
+
for voice in self.voice_data:
|
12 |
+
if voice['language'] == language and voice['gender'] == gender:
|
13 |
+
if voice['neural'] == 'Yes':
|
14 |
+
return voice['voice_id'], voice['lang_code'], NEURAL_ENGINE
|
15 |
+
for voice in self.voice_data:
|
16 |
+
if voice['language'] == language and voice['gender'] == gender:
|
17 |
+
if voice['standard'] == 'Yes':
|
18 |
+
return voice['voice_id'], voice['lang_code'], STANDARD_ENGINE
|
19 |
+
return None, None, None
|
20 |
+
|
21 |
+
def get_whisper_lang_code(self, language):
|
22 |
+
for voice in self.voice_data:
|
23 |
+
if voice['language'] == language:
|
24 |
+
return voice['whisper_lang_code']
|
25 |
+
return "en"
|
26 |
+
|
27 |
+
def __init__(self):
|
28 |
+
self.voice_data = [
|
29 |
+
{'language': 'Arabic',
|
30 |
+
'lang_code': 'arb',
|
31 |
+
'whisper_lang_code': 'ar',
|
32 |
+
'voice_id': 'Zeina',
|
33 |
+
'gender': 'Female',
|
34 |
+
'neural': 'No',
|
35 |
+
'standard': 'Yes'},
|
36 |
+
{'language': 'Arabic (Gulf)',
|
37 |
+
'lang_code': 'ar-AE',
|
38 |
+
'whisper_lang_code': 'ar',
|
39 |
+
'voice_id': 'Hala',
|
40 |
+
'gender': 'Female',
|
41 |
+
'neural': 'Yes',
|
42 |
+
'standard': 'No'},
|
43 |
+
{'language': 'Catalan',
|
44 |
+
'lang_code': 'ca-ES',
|
45 |
+
'whisper_lang_code': 'ca',
|
46 |
+
'voice_id': 'Arlet',
|
47 |
+
'gender': 'Female',
|
48 |
+
'neural': 'Yes',
|
49 |
+
'standard': 'No'},
|
50 |
+
{'language': 'Chinese (Cantonese)',
|
51 |
+
'lang_code': 'yue-CN',
|
52 |
+
'whisper_lang_code': 'zh',
|
53 |
+
'voice_id': 'Hiujin',
|
54 |
+
'gender': 'Female',
|
55 |
+
'neural': 'Yes',
|
56 |
+
'standard': 'No'},
|
57 |
+
{'language': 'Chinese (Mandarin)',
|
58 |
+
'lang_code': 'cmn-CN',
|
59 |
+
'whisper_lang_code': 'zh',
|
60 |
+
'voice_id': 'Zhiyu',
|
61 |
+
'gender': 'Female',
|
62 |
+
'neural': 'Yes',
|
63 |
+
'standard': 'No'},
|
64 |
+
{'language': 'Danish',
|
65 |
+
'lang_code': 'da-DK',
|
66 |
+
'whisper_lang_code': 'da',
|
67 |
+
'voice_id': 'Naja',
|
68 |
+
'gender': 'Female',
|
69 |
+
'neural': 'No',
|
70 |
+
'standard': 'Yes'},
|
71 |
+
{'language': 'Danish',
|
72 |
+
'lang_code': 'da-DK',
|
73 |
+
'whisper_lang_code': 'da',
|
74 |
+
'voice_id': 'Mads',
|
75 |
+
'gender': 'Male',
|
76 |
+
'neural': 'No',
|
77 |
+
'standard': 'Yes'},
|
78 |
+
{'language': 'Dutch',
|
79 |
+
'lang_code': 'nl-NL',
|
80 |
+
'whisper_lang_code': 'nl',
|
81 |
+
'voice_id': 'Laura',
|
82 |
+
'gender': 'Female',
|
83 |
+
'neural': 'Yes',
|
84 |
+
'standard': 'No'},
|
85 |
+
{'language': 'Dutch',
|
86 |
+
'lang_code': 'nl-NL',
|
87 |
+
'whisper_lang_code': 'nl',
|
88 |
+
'voice_id': 'Lotte',
|
89 |
+
'gender': 'Female',
|
90 |
+
'neural': 'No',
|
91 |
+
'standard': 'Yes'},
|
92 |
+
{'language': 'Dutch',
|
93 |
+
'lang_code': 'nl-NL',
|
94 |
+
'whisper_lang_code': 'nl',
|
95 |
+
'voice_id': 'Ruben',
|
96 |
+
'gender': 'Male',
|
97 |
+
'neural': 'No',
|
98 |
+
'standard': 'Yes'},
|
99 |
+
{'language': 'English (Australian)',
|
100 |
+
'lang_code': 'en-AU',
|
101 |
+
'whisper_lang_code': 'en',
|
102 |
+
'voice_id': 'Nicole',
|
103 |
+
'gender': 'Female',
|
104 |
+
'neural': 'No',
|
105 |
+
'standard': 'Yes'},
|
106 |
+
{'language': 'English (Australian)',
|
107 |
+
'lang_code': 'en-AU',
|
108 |
+
'whisper_lang_code': 'en',
|
109 |
+
'voice_id': 'Olivia',
|
110 |
+
'gender': 'Female',
|
111 |
+
'neural': 'Yes',
|
112 |
+
'standard': 'No'},
|
113 |
+
{'language': 'English (Australian)',
|
114 |
+
'lang_code': 'en-AU',
|
115 |
+
'whisper_lang_code': 'en',
|
116 |
+
'voice_id': 'Russell',
|
117 |
+
'gender': 'Male',
|
118 |
+
'neural': 'No',
|
119 |
+
'standard': 'Yes'},
|
120 |
+
{'language': 'English (British)',
|
121 |
+
'lang_code': 'en-GB',
|
122 |
+
'whisper_lang_code': 'en',
|
123 |
+
'voice_id': 'Amy',
|
124 |
+
'gender': 'Female',
|
125 |
+
'neural': 'Yes',
|
126 |
+
'standard': 'Yes'},
|
127 |
+
{'language': 'English (British)',
|
128 |
+
'lang_code': 'en-GB',
|
129 |
+
'whisper_lang_code': 'en',
|
130 |
+
'voice_id': 'Emma',
|
131 |
+
'gender': 'Female',
|
132 |
+
'neural': 'Yes',
|
133 |
+
'standard': 'Yes'},
|
134 |
+
{'language': 'English (British)',
|
135 |
+
'lang_code': 'en-GB',
|
136 |
+
'whisper_lang_code': 'en',
|
137 |
+
'voice_id': 'Brian',
|
138 |
+
'gender': 'Male',
|
139 |
+
'neural': 'Yes',
|
140 |
+
'standard': 'Yes'},
|
141 |
+
{'language': 'English (British)',
|
142 |
+
'lang_code': 'en-GB',
|
143 |
+
'whisper_lang_code': 'en',
|
144 |
+
'voice_id': 'Arthur',
|
145 |
+
'gender': 'Male',
|
146 |
+
'neural': 'Yes',
|
147 |
+
'standard': 'No'},
|
148 |
+
{'language': 'English (Indian)',
|
149 |
+
'lang_code': 'en-IN',
|
150 |
+
'whisper_lang_code': 'en',
|
151 |
+
'voice_id': 'Aditi',
|
152 |
+
'gender': 'Female',
|
153 |
+
'neural': 'No',
|
154 |
+
'standard': 'Yes'},
|
155 |
+
{'language': 'English (Indian)',
|
156 |
+
'lang_code': 'en-IN',
|
157 |
+
'whisper_lang_code': 'en',
|
158 |
+
'voice_id': 'Raveena',
|
159 |
+
'gender': 'Female',
|
160 |
+
'neural': 'No',
|
161 |
+
'standard': 'Yes'},
|
162 |
+
{'language': 'English (Indian)',
|
163 |
+
'lang_code': 'en-IN',
|
164 |
+
'whisper_lang_code': 'en',
|
165 |
+
'voice_id': 'Kajal',
|
166 |
+
'gender': 'Female',
|
167 |
+
'neural': 'Yes',
|
168 |
+
'standard': 'No'},
|
169 |
+
{'language': 'English (New Zealand)',
|
170 |
+
'lang_code': 'en-NZ',
|
171 |
+
'whisper_lang_code': 'en',
|
172 |
+
'voice_id': 'Aria',
|
173 |
+
'gender': 'Female',
|
174 |
+
'neural': 'Yes',
|
175 |
+
'standard': 'No'},
|
176 |
+
{'language': 'English (South African)',
|
177 |
+
'lang_code': 'en-ZA',
|
178 |
+
'whisper_lang_code': 'en',
|
179 |
+
'voice_id': 'Ayanda',
|
180 |
+
'gender': 'Female',
|
181 |
+
'neural': 'Yes',
|
182 |
+
'standard': 'No'},
|
183 |
+
{'language': 'English (US)',
|
184 |
+
'lang_code': 'en-US',
|
185 |
+
'whisper_lang_code': 'en',
|
186 |
+
'voice_id': 'Ivy',
|
187 |
+
'gender': 'Female (child)',
|
188 |
+
'neural': 'Yes',
|
189 |
+
'standard': 'Yes'},
|
190 |
+
{'language': 'English (US)',
|
191 |
+
'lang_code': 'en-US',
|
192 |
+
'whisper_lang_code': 'en',
|
193 |
+
'voice_id': 'Joanna',
|
194 |
+
'gender': 'Female',
|
195 |
+
'neural': 'Yes',
|
196 |
+
'standard': 'Yes'},
|
197 |
+
{'language': 'English (US)',
|
198 |
+
'lang_code': 'en-US',
|
199 |
+
'whisper_lang_code': 'en',
|
200 |
+
'voice_id': 'Kendra',
|
201 |
+
'gender': 'Female',
|
202 |
+
'neural': 'Yes',
|
203 |
+
'standard': 'Yes'},
|
204 |
+
{'language': 'English (US)',
|
205 |
+
'lang_code': 'en-US',
|
206 |
+
'whisper_lang_code': 'en',
|
207 |
+
'voice_id': 'Kimberly',
|
208 |
+
'gender': 'Female',
|
209 |
+
'neural': 'Yes',
|
210 |
+
'standard': 'Yes'},
|
211 |
+
{'language': 'English (US)',
|
212 |
+
'lang_code': 'en-US',
|
213 |
+
'whisper_lang_code': 'en',
|
214 |
+
'voice_id': 'Salli',
|
215 |
+
'gender': 'Female',
|
216 |
+
'neural': 'Yes',
|
217 |
+
'standard': 'Yes'},
|
218 |
+
{'language': 'English (US)',
|
219 |
+
'lang_code': 'en-US',
|
220 |
+
'whisper_lang_code': 'en',
|
221 |
+
'voice_id': 'Joey',
|
222 |
+
'gender': 'Male',
|
223 |
+
'neural': 'Yes',
|
224 |
+
'standard': 'Yes'},
|
225 |
+
{'language': 'English (US)',
|
226 |
+
'lang_code': 'en-US',
|
227 |
+
'whisper_lang_code': 'en',
|
228 |
+
'voice_id': 'Justin',
|
229 |
+
'gender': 'Male (child)',
|
230 |
+
'neural': 'Yes',
|
231 |
+
'standard': 'Yes'},
|
232 |
+
{'language': 'English (US)',
|
233 |
+
'lang_code': 'en-US',
|
234 |
+
'whisper_lang_code': 'en',
|
235 |
+
'voice_id': 'Kevin',
|
236 |
+
'gender': 'Male (child)',
|
237 |
+
'neural': 'Yes',
|
238 |
+
'standard': 'No'},
|
239 |
+
{'language': 'English (US)',
|
240 |
+
'lang_code': 'en-US',
|
241 |
+
'whisper_lang_code': 'en',
|
242 |
+
'voice_id': 'Matthew',
|
243 |
+
'gender': 'Male',
|
244 |
+
'neural': 'Yes',
|
245 |
+
'standard': 'Yes'},
|
246 |
+
{'language': 'English (Welsh)',
|
247 |
+
'lang_code': 'en-GB-WLS',
|
248 |
+
'whisper_lang_code': 'en',
|
249 |
+
'voice_id': 'Geraint',
|
250 |
+
'gender': 'Male',
|
251 |
+
'neural': 'No',
|
252 |
+
'standard': 'Yes'},
|
253 |
+
{'language': 'Finnish',
|
254 |
+
'lang_code': 'fi-FI',
|
255 |
+
'whisper_lang_code': 'fi',
|
256 |
+
'voice_id': 'Suvi',
|
257 |
+
'gender': 'Female',
|
258 |
+
'neural': 'Yes',
|
259 |
+
'standard': 'No'},
|
260 |
+
{'language': 'French',
|
261 |
+
'lang_code': 'fr-FR',
|
262 |
+
'whisper_lang_code': 'fr',
|
263 |
+
'voice_id': 'Celine',
|
264 |
+
'gender': 'Female',
|
265 |
+
'neural': 'No',
|
266 |
+
'standard': 'Yes'},
|
267 |
+
{'language': 'French',
|
268 |
+
'lang_code': 'fr-FR',
|
269 |
+
'whisper_lang_code': 'fr',
|
270 |
+
'voice_id': 'Lea',
|
271 |
+
'gender': 'Female',
|
272 |
+
'neural': 'Yes',
|
273 |
+
'standard': 'Yes'},
|
274 |
+
{'language': 'French',
|
275 |
+
'lang_code': 'fr-FR',
|
276 |
+
'whisper_lang_code': 'fr',
|
277 |
+
'voice_id': 'Mathieu',
|
278 |
+
'gender': 'Male',
|
279 |
+
'neural': 'No',
|
280 |
+
'standard': 'Yes'},
|
281 |
+
{'language': 'French (Canadian)',
|
282 |
+
'lang_code': 'fr-CA',
|
283 |
+
'whisper_lang_code': 'fr',
|
284 |
+
'voice_id': 'Chantal',
|
285 |
+
'gender': 'Female',
|
286 |
+
'neural': 'No',
|
287 |
+
'standard': 'Yes'},
|
288 |
+
{'language': 'French (Canadian)',
|
289 |
+
'lang_code': 'fr-CA',
|
290 |
+
'whisper_lang_code': 'fr',
|
291 |
+
'voice_id': 'Gabrielle',
|
292 |
+
'gender': 'Female',
|
293 |
+
'neural': 'Yes',
|
294 |
+
'standard': 'No'},
|
295 |
+
{'language': 'French (Canadian)',
|
296 |
+
'lang_code': 'fr-CA',
|
297 |
+
'whisper_lang_code': 'fr',
|
298 |
+
'voice_id': 'Liam',
|
299 |
+
'gender': 'Male',
|
300 |
+
'neural': 'Yes',
|
301 |
+
'standard': 'No'},
|
302 |
+
{'language': 'German',
|
303 |
+
'lang_code': 'de-DE',
|
304 |
+
'whisper_lang_code': 'de',
|
305 |
+
'voice_id': 'Marlene',
|
306 |
+
'gender': 'Female',
|
307 |
+
'neural': 'No',
|
308 |
+
'standard': 'Yes'},
|
309 |
+
{'language': 'German',
|
310 |
+
'lang_code': 'de-DE',
|
311 |
+
'whisper_lang_code': 'de',
|
312 |
+
'voice_id': 'Vicki',
|
313 |
+
'gender': 'Female',
|
314 |
+
'neural': 'Yes',
|
315 |
+
'standard': 'Yes'},
|
316 |
+
{'language': 'German',
|
317 |
+
'lang_code': 'de-DE',
|
318 |
+
'whisper_lang_code': 'de',
|
319 |
+
'voice_id': 'Hans',
|
320 |
+
'gender': 'Male',
|
321 |
+
'neural': 'No',
|
322 |
+
'standard': 'Yes'},
|
323 |
+
{'language': 'German',
|
324 |
+
'lang_code': 'de-DE',
|
325 |
+
'whisper_lang_code': 'de',
|
326 |
+
'voice_id': 'Daniel',
|
327 |
+
'gender': 'Male',
|
328 |
+
'neural': 'Yes',
|
329 |
+
'standard': 'No'},
|
330 |
+
{'language': 'German (Austrian)',
|
331 |
+
'lang_code': 'de-AT',
|
332 |
+
'whisper_lang_code': 'de',
|
333 |
+
'voice_id': 'Hannah',
|
334 |
+
'gender': 'Female',
|
335 |
+
'neural': 'Yes',
|
336 |
+
'standard': 'No'},
|
337 |
+
{'language': 'Hindi',
|
338 |
+
'lang_code': 'hi-IN',
|
339 |
+
'whisper_lang_code': 'hi',
|
340 |
+
'voice_id': 'Aditi',
|
341 |
+
'gender': 'Female',
|
342 |
+
'neural': 'No',
|
343 |
+
'standard': 'Yes'},
|
344 |
+
{'language': 'Hindi',
|
345 |
+
'lang_code': 'hi-IN',
|
346 |
+
'whisper_lang_code': 'hi',
|
347 |
+
'voice_id': 'Kajal',
|
348 |
+
'gender': 'Female',
|
349 |
+
'neural': 'Yes',
|
350 |
+
'standard': 'No'},
|
351 |
+
{'language': 'Icelandic',
|
352 |
+
'lang_code': 'is-IS',
|
353 |
+
'whisper_lang_code': 'is',
|
354 |
+
'voice_id': 'Dora',
|
355 |
+
'gender': 'Female',
|
356 |
+
'neural': 'No',
|
357 |
+
'standard': 'Yes'},
|
358 |
+
{'language': 'Icelandic',
|
359 |
+
'lang_code': 'is-IS',
|
360 |
+
'whisper_lang_code': 'is',
|
361 |
+
'voice_id': 'Karl',
|
362 |
+
'gender': 'Male',
|
363 |
+
'neural': 'No',
|
364 |
+
'standard': 'Yes'},
|
365 |
+
{'language': 'Italian',
|
366 |
+
'lang_code': 'it-IT',
|
367 |
+
'whisper_lang_code': 'it',
|
368 |
+
'voice_id': 'Carla',
|
369 |
+
'gender': 'Female',
|
370 |
+
'neural': 'No',
|
371 |
+
'standard': 'Yes'},
|
372 |
+
{'language': 'Italian',
|
373 |
+
'lang_code': 'it-IT',
|
374 |
+
'whisper_lang_code': 'it',
|
375 |
+
'voice_id': 'Bianca',
|
376 |
+
'gender': 'Female',
|
377 |
+
'neural': 'Yes',
|
378 |
+
'standard': 'Yes'},
|
379 |
+
{'language': 'Japanese',
|
380 |
+
'lang_code': 'ja-JP',
|
381 |
+
'whisper_lang_code': 'ja',
|
382 |
+
'voice_id': 'Mizuki',
|
383 |
+
'gender': 'Female',
|
384 |
+
'neural': 'No',
|
385 |
+
'standard': 'Yes'},
|
386 |
+
{'language': 'Japanese',
|
387 |
+
'lang_code': 'ja-JP',
|
388 |
+
'whisper_lang_code': 'ja',
|
389 |
+
'voice_id': 'Takumi',
|
390 |
+
'gender': 'Male',
|
391 |
+
'neural': 'Yes',
|
392 |
+
'standard': 'Yes'},
|
393 |
+
{'language': 'Korean',
|
394 |
+
'lang_code': 'ko-KR',
|
395 |
+
'whisper_lang_code': 'ko',
|
396 |
+
'voice_id': 'Seoyeon',
|
397 |
+
'gender': 'Female',
|
398 |
+
'neural': 'Yes',
|
399 |
+
'standard': 'Yes'},
|
400 |
+
{'language': 'Norwegian',
|
401 |
+
'lang_code': 'nb-NO',
|
402 |
+
'whisper_lang_code': 'no',
|
403 |
+
'voice_id': 'Liv',
|
404 |
+
'gender': 'Female',
|
405 |
+
'neural': 'No',
|
406 |
+
'standard': 'Yes'},
|
407 |
+
{'language': 'Norwegian',
|
408 |
+
'lang_code': 'nb-NO',
|
409 |
+
'whisper_lang_code': 'no',
|
410 |
+
'voice_id': 'Ida',
|
411 |
+
'gender': 'Female',
|
412 |
+
'neural': 'Yes',
|
413 |
+
'standard': 'No'},
|
414 |
+
{'language': 'Polish',
|
415 |
+
'lang_code': 'pl-PL',
|
416 |
+
'whisper_lang_code': 'pl',
|
417 |
+
'voice_id': 'Ewa',
|
418 |
+
'gender': 'Female',
|
419 |
+
'neural': 'No',
|
420 |
+
'standard': 'Yes'},
|
421 |
+
{'language': 'Polish',
|
422 |
+
'lang_code': 'pl-PL',
|
423 |
+
'whisper_lang_code': 'pl',
|
424 |
+
'voice_id': 'Maja',
|
425 |
+
'gender': 'Female',
|
426 |
+
'neural': 'No',
|
427 |
+
'standard': 'Yes'},
|
428 |
+
{'language': 'Polish',
|
429 |
+
'lang_code': 'pl-PL',
|
430 |
+
'whisper_lang_code': 'pl',
|
431 |
+
'voice_id': 'Jacek',
|
432 |
+
'gender': 'Male',
|
433 |
+
'neural': 'No',
|
434 |
+
'standard': 'Yes'},
|
435 |
+
{'language': 'Polish',
|
436 |
+
'lang_code': 'pl-PL',
|
437 |
+
'whisper_lang_code': 'pl',
|
438 |
+
'voice_id': 'Jan',
|
439 |
+
'gender': 'Male',
|
440 |
+
'neural': 'No',
|
441 |
+
'standard': 'Yes'},
|
442 |
+
{'language': 'Polish',
|
443 |
+
'lang_code': 'pl-PL',
|
444 |
+
'whisper_lang_code': 'pl',
|
445 |
+
'voice_id': 'Ola',
|
446 |
+
'gender': 'Female',
|
447 |
+
'neural': 'Yes',
|
448 |
+
'standard': 'No'},
|
449 |
+
{'language': 'Portuguese (Brazilian)',
|
450 |
+
'lang_code': 'pt-BR',
|
451 |
+
'whisper_lang_code': 'pt',
|
452 |
+
'voice_id': 'Camila',
|
453 |
+
'gender': 'Female',
|
454 |
+
'neural': 'Yes',
|
455 |
+
'standard': 'Yes'},
|
456 |
+
{'language': 'Portuguese (Brazilian)',
|
457 |
+
'lang_code': 'pt-BR',
|
458 |
+
'whisper_lang_code': 'pt',
|
459 |
+
'voice_id': 'Vitoria',
|
460 |
+
'gender': 'Female',
|
461 |
+
'neural': 'Yes',
|
462 |
+
'standard': 'Yes'},
|
463 |
+
{'language': 'Portuguese (Brazilian)',
|
464 |
+
'lang_code': 'pt-BR',
|
465 |
+
'whisper_lang_code': 'pt',
|
466 |
+
'voice_id': 'Ricardo',
|
467 |
+
'gender': 'Male',
|
468 |
+
'neural': 'No',
|
469 |
+
'standard': 'Yes'},
|
470 |
+
{'language': 'Portuguese (European)',
|
471 |
+
'lang_code': 'pt-PT',
|
472 |
+
'whisper_lang_code': 'pt',
|
473 |
+
'voice_id': 'Ines',
|
474 |
+
'gender': 'Female',
|
475 |
+
'neural': 'Yes',
|
476 |
+
'standard': 'Yes'},
|
477 |
+
{'language': 'Portuguese (European)',
|
478 |
+
'lang_code': 'pt-PT',
|
479 |
+
'whisper_lang_code': 'pt',
|
480 |
+
'voice_id': 'Cristiano',
|
481 |
+
'gender': 'Male',
|
482 |
+
'neural': 'No',
|
483 |
+
'standard': 'Yes'},
|
484 |
+
{'language': 'Romanian',
|
485 |
+
'lang_code': 'ro-RO',
|
486 |
+
'whisper_lang_code': 'ro',
|
487 |
+
'voice_id': 'Carmen',
|
488 |
+
'gender': 'Female',
|
489 |
+
'neural': 'No',
|
490 |
+
'standard': 'Yes'},
|
491 |
+
{'language': 'Russian',
|
492 |
+
'lang_code': 'ru-RU',
|
493 |
+
'whisper_lang_code': 'ru',
|
494 |
+
'voice_id': 'Tatyana',
|
495 |
+
'gender': 'Female',
|
496 |
+
'neural': 'No',
|
497 |
+
'standard': 'Yes'},
|
498 |
+
{'language': 'Russian',
|
499 |
+
'lang_code': 'ru-RU',
|
500 |
+
'whisper_lang_code': 'ru',
|
501 |
+
'voice_id': 'Maxim',
|
502 |
+
'gender': 'Male',
|
503 |
+
'neural': 'No',
|
504 |
+
'standard': 'Yes'},
|
505 |
+
{'language': 'Spanish (European)',
|
506 |
+
'lang_code': 'es-ES',
|
507 |
+
'whisper_lang_code': 'es',
|
508 |
+
'voice_id': 'Conchita',
|
509 |
+
'gender': 'Female',
|
510 |
+
'neural': 'No',
|
511 |
+
'standard': 'Yes'},
|
512 |
+
{'language': 'Spanish (European)',
|
513 |
+
'lang_code': 'es-ES',
|
514 |
+
'whisper_lang_code': 'es',
|
515 |
+
'voice_id': 'Lucia',
|
516 |
+
'gender': 'Female',
|
517 |
+
'neural': 'Yes',
|
518 |
+
'standard': 'Yes'},
|
519 |
+
{'language': 'Spanish (European)',
|
520 |
+
'lang_code': 'es-ES',
|
521 |
+
'whisper_lang_code': 'es',
|
522 |
+
'voice_id': 'Enrique',
|
523 |
+
'gender': 'Male',
|
524 |
+
'neural': 'No',
|
525 |
+
'standard': 'Yes'},
|
526 |
+
{'language': 'Spanish (Mexican)',
|
527 |
+
'lang_code': 'es-MX',
|
528 |
+
'whisper_lang_code': 'es',
|
529 |
+
'voice_id': 'Mia',
|
530 |
+
'gender': 'Female',
|
531 |
+
'neural': 'Yes',
|
532 |
+
'standard': 'Yes'},
|
533 |
+
{'language': 'Spanish (US)',
|
534 |
+
'lang_code': 'es-US',
|
535 |
+
'whisper_lang_code': 'es',
|
536 |
+
'voice_id': 'Lupe',
|
537 |
+
'gender': 'Female',
|
538 |
+
'neural': 'Yes',
|
539 |
+
'standard': 'Yes'},
|
540 |
+
{'language': 'Spanish (US)',
|
541 |
+
'lang_code': 'es-US',
|
542 |
+
'whisper_lang_code': 'es',
|
543 |
+
'voice_id': 'Penelope',
|
544 |
+
'gender': 'Female',
|
545 |
+
'neural': 'No',
|
546 |
+
'standard': 'Yes'},
|
547 |
+
{'language': 'Spanish (US)',
|
548 |
+
'lang_code': 'es-US',
|
549 |
+
'whisper_lang_code': 'es',
|
550 |
+
'voice_id': 'Miguel',
|
551 |
+
'gender': 'Male',
|
552 |
+
'neural': 'No',
|
553 |
+
'standard': 'Yes'},
|
554 |
+
{'language': 'Spanish (US)',
|
555 |
+
'lang_code': 'es-US',
|
556 |
+
'whisper_lang_code': 'es',
|
557 |
+
'voice_id': 'Pedro',
|
558 |
+
'gender': 'Male',
|
559 |
+
'neural': 'Yes',
|
560 |
+
'standard': 'No'},
|
561 |
+
{'language': 'Swedish',
|
562 |
+
'lang_code': 'sv-SE',
|
563 |
+
'whisper_lang_code': 'sv',
|
564 |
+
'voice_id': 'Astrid',
|
565 |
+
'gender': 'Female',
|
566 |
+
'neural': 'No',
|
567 |
+
'standard': 'Yes'},
|
568 |
+
{'language': 'Swedish',
|
569 |
+
'lang_code': 'sv-SE',
|
570 |
+
'whisper_lang_code': 'sv',
|
571 |
+
'voice_id': 'Elin',
|
572 |
+
'gender': 'Female',
|
573 |
+
'neural': 'Yes',
|
574 |
+
'standard': 'No'},
|
575 |
+
{'language': 'Turkish',
|
576 |
+
'lang_code': 'tr-TR',
|
577 |
+
'whisper_lang_code': 'tr',
|
578 |
+
'voice_id': 'Filiz',
|
579 |
+
'gender': 'Female',
|
580 |
+
'neural': 'No',
|
581 |
+
'standard': 'Yes'},
|
582 |
+
{'language': 'Welsh',
|
583 |
+
'lang_code': 'cy-GB',
|
584 |
+
'whisper_lang_code': 'cy',
|
585 |
+
'voice_id': 'Gwyneth',
|
586 |
+
'gender': 'Female',
|
587 |
+
'neural': 'No',
|
588 |
+
'standard': 'Yes'}
|
589 |
+
]
|
590 |
+
|
591 |
+
|
592 |
+
# Run from the command-line
|
593 |
+
if __name__ == '__main__':
|
594 |
+
polly_voice_data = PollyVoiceData()
|
595 |
+
|
596 |
+
voice_id, language_code, engine = polly_voice_data.get_voice('English (US)', 'Male')
|
597 |
+
print('English (US)', 'Male', voice_id, language_code, engine)
|
598 |
+
|
599 |
+
voice_id, language_code, engine = polly_voice_data.get_voice('English (US)', 'Female')
|
600 |
+
print('English (US)', 'Female', voice_id, language_code, engine)
|
601 |
+
|
602 |
+
voice_id, language_code, engine = polly_voice_data.get_voice('French', 'Female')
|
603 |
+
print('French', 'Female', voice_id, language_code, engine)
|
604 |
+
|
605 |
+
voice_id, language_code, engine = polly_voice_data.get_voice('French', 'Male')
|
606 |
+
print('French', 'Male', voice_id, language_code, engine)
|
607 |
+
|
608 |
+
voice_id, language_code, engine = polly_voice_data.get_voice('Japanese', 'Female')
|
609 |
+
print('Japanese', 'Female', voice_id, language_code, engine)
|
610 |
+
|
611 |
+
voice_id, language_code, engine = polly_voice_data.get_voice('Japanese', 'Male')
|
612 |
+
print('Japanese', 'Male', voice_id, language_code, engine)
|
613 |
+
|
614 |
+
voice_id, language_code, engine = polly_voice_data.get_voice('Hindi', 'Female')
|
615 |
+
print('Hindi', 'Female', voice_id, language_code, engine)
|
616 |
+
|
617 |
+
voice_id, language_code, engine = polly_voice_data.get_voice('Hindi', 'Male')
|
618 |
+
print('Hindi', 'Male', voice_id, language_code, engine)
|
619 |
+
|
620 |
+
whisper_lang_code = polly_voice_data.get_whisper_lang_code('English (US)')
|
621 |
+
print('English (US) whisper_lang_code:', whisper_lang_code)
|
622 |
+
|
623 |
+
whisper_lang_code = polly_voice_data.get_whisper_lang_code('Chinese (Mandarin)')
|
624 |
+
print('Chinese (Mandarin) whisper_lang_code:', whisper_lang_code)
|
625 |
+
|
626 |
+
whisper_lang_code = polly_voice_data.get_whisper_lang_code('Norwegian')
|
627 |
+
print('Norwegian whisper_lang_code:', whisper_lang_code)
|
628 |
+
|
629 |
+
whisper_lang_code = polly_voice_data.get_whisper_lang_code('Dutch')
|
630 |
+
print('Dutch whisper_lang_code:', whisper_lang_code)
|
631 |
+
|
632 |
+
whisper_lang_code = polly_voice_data.get_whisper_lang_code('Foo')
|
633 |
+
print('Foo whisper_lang_code:', whisper_lang_code)
|
634 |
+
|
635 |
+
|
requirements.txt
CHANGED
@@ -1,2 +1,11 @@
|
|
1 |
sentence-transformers
|
2 |
-
datasets
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
sentence-transformers
|
2 |
+
datasets
|
3 |
+
openai==0.26.1
|
4 |
+
gradio==3.16.2
|
5 |
+
google-search-results
|
6 |
+
google-api-python-client==2.72.0
|
7 |
+
wolframalpha
|
8 |
+
langchain==0.0.63
|
9 |
+
requests==2.28.2
|
10 |
+
git+https://github.com/openai/whisper.git
|
11 |
+
boto3
|
videos/humancare.mp4
ADDED
Binary file (235 kB). View file
|
|