Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,28 +1,21 @@
|
|
1 |
import torch
|
2 |
-
from PIL import Image
|
3 |
import gradio as gr
|
4 |
import spaces
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
|
6 |
import os
|
7 |
from threading import Thread
|
8 |
|
9 |
-
import pymupdf
|
10 |
-
import docx
|
11 |
-
from pptx import Presentation
|
12 |
-
from_tf=True
|
13 |
-
|
14 |
-
|
15 |
MODEL_LIST = ["THUDM/GLM-4-Z1-32B-0414"]
|
16 |
|
17 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
18 |
MODEL_ID = MODEL_LIST[0]
|
19 |
MODEL_NAME = "GLM-4-Z1-32B-0414"
|
20 |
|
21 |
-
TITLE = "<h1>3ML-bot</h1>"
|
22 |
|
23 |
DESCRIPTION = f"""
|
24 |
<center>
|
25 |
-
<p>😊 A Multi-
|
26 |
<br>
|
27 |
🚀 MODEL NOW: <a href="https://hf.co/nikravan/glm-4vq">{MODEL_NAME}</a>
|
28 |
</center>"""
|
@@ -44,58 +37,6 @@ quantization_config = BitsAndBytesConfig(
|
|
44 |
|
45 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
46 |
|
47 |
-
def extract_text(path):
|
48 |
-
return open(path, 'r').read()
|
49 |
-
|
50 |
-
def extract_pdf(path):
|
51 |
-
doc = pymupdf.open(path)
|
52 |
-
text = ""
|
53 |
-
for page in doc:
|
54 |
-
text += page.get_text()
|
55 |
-
return text
|
56 |
-
|
57 |
-
def extract_docx(path):
|
58 |
-
doc = docx.Document(path)
|
59 |
-
data = []
|
60 |
-
for paragraph in doc.paragraphs:
|
61 |
-
data.append(paragraph.text)
|
62 |
-
content = '\n\n'.join(data)
|
63 |
-
return content
|
64 |
-
|
65 |
-
def extract_pptx(path):
|
66 |
-
prs = Presentation(path)
|
67 |
-
text = ""
|
68 |
-
for slide in prs.slides:
|
69 |
-
for shape in slide.shapes:
|
70 |
-
if hasattr(shape, "text"):
|
71 |
-
text += shape.text + "\n"
|
72 |
-
return text
|
73 |
-
|
74 |
-
def mode_load(path):
|
75 |
-
choice = ""
|
76 |
-
file_type = path.split(".")[-1]
|
77 |
-
print(file_type)
|
78 |
-
if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
|
79 |
-
if file_type.endswith("pdf"):
|
80 |
-
content = extract_pdf(path)
|
81 |
-
elif file_type.endswith("docx"):
|
82 |
-
content = extract_docx(path)
|
83 |
-
elif file_type.endswith("pptx"):
|
84 |
-
content = extract_pptx(path)
|
85 |
-
else:
|
86 |
-
content = extract_text(path)
|
87 |
-
choice = "doc"
|
88 |
-
print(content[:100])
|
89 |
-
return choice, content[:5000]
|
90 |
-
|
91 |
-
elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
|
92 |
-
content = Image.open(path).convert('RGB')
|
93 |
-
choice = "image"
|
94 |
-
return choice, content
|
95 |
-
|
96 |
-
else:
|
97 |
-
raise gr.Error("Oops, unsupported files.")
|
98 |
-
|
99 |
@spaces.GPU()
|
100 |
def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
|
101 |
|
@@ -110,41 +51,21 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
|
|
110 |
|
111 |
print(f'message is - {message}')
|
112 |
print(f'history is - {history}')
|
|
|
113 |
conversation = []
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
if len(history) == 0:
|
124 |
-
contents = None
|
125 |
-
conversation.append({"role": "user", "content": message['text']})
|
126 |
-
else:
|
127 |
-
for prompt, answer in history:
|
128 |
-
if answer is None:
|
129 |
-
prompt_files.append(prompt[0])
|
130 |
-
conversation.extend([{"role": "user", "content": ""}, {"role": "assistant", "content": ""}])
|
131 |
-
else:
|
132 |
-
conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
|
133 |
-
if len(prompt_files) > 0:
|
134 |
-
choice, contents = mode_load(prompt_files[-1])
|
135 |
-
else:
|
136 |
-
choice = ""
|
137 |
-
conversation.append({"role": "user", "image": "", "content": message['text']})
|
138 |
-
|
139 |
-
if choice == "image":
|
140 |
-
conversation.append({"role": "user", "image": contents, "content": message['text']})
|
141 |
-
elif choice == "doc":
|
142 |
-
format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
|
143 |
-
conversation.append({"role": "user", "content": format_msg})
|
144 |
print(f"Conversation is -\n{conversation}")
|
145 |
|
146 |
input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True,
|
147 |
-
|
148 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
149 |
|
150 |
generate_kwargs = dict(
|
@@ -168,17 +89,21 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
|
|
168 |
yield buffer
|
169 |
|
170 |
chatbot = gr.Chatbot()
|
171 |
-
chat_input = gr.
|
172 |
interactive=True,
|
173 |
-
placeholder="Enter message
|
174 |
show_label=False,
|
175 |
)
|
176 |
|
177 |
EXAMPLES = [
|
178 |
-
[
|
179 |
-
[
|
180 |
-
[
|
181 |
-
[
|
|
|
|
|
|
|
|
|
182 |
]
|
183 |
|
184 |
with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
|
@@ -186,7 +111,6 @@ with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
|
|
186 |
gr.HTML(DESCRIPTION)
|
187 |
gr.ChatInterface(
|
188 |
fn=stream_chat,
|
189 |
-
multimodal=True,
|
190 |
textbox=chat_input,
|
191 |
chatbot=chatbot,
|
192 |
fill_height=True,
|
@@ -233,8 +157,8 @@ with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
|
|
233 |
render=False,
|
234 |
),
|
235 |
],
|
236 |
-
|
237 |
-
|
238 |
|
239 |
if __name__ == "__main__":
|
240 |
demo.queue(api_open=False).launch(show_api=False, share=False)
|
|
|
1 |
import torch
|
|
|
2 |
import gradio as gr
|
3 |
import spaces
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
|
5 |
import os
|
6 |
from threading import Thread
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
MODEL_LIST = ["THUDM/GLM-4-Z1-32B-0414"]
|
9 |
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
11 |
MODEL_ID = MODEL_LIST[0]
|
12 |
MODEL_NAME = "GLM-4-Z1-32B-0414"
|
13 |
|
14 |
+
TITLE = "<h1>3ML-bot (Text Only)</h1>"
|
15 |
|
16 |
DESCRIPTION = f"""
|
17 |
<center>
|
18 |
+
<p>😊 A Multi-Lingual Analytical Chatbot.
|
19 |
<br>
|
20 |
🚀 MODEL NOW: <a href="https://hf.co/nikravan/glm-4vq">{MODEL_NAME}</a>
|
21 |
</center>"""
|
|
|
37 |
|
38 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
@spaces.GPU()
|
41 |
def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
|
42 |
|
|
|
51 |
|
52 |
print(f'message is - {message}')
|
53 |
print(f'history is - {history}')
|
54 |
+
|
55 |
conversation = []
|
56 |
+
if len(history) > 0:
|
57 |
+
for prompt, answer in history:
|
58 |
+
conversation.extend([
|
59 |
+
{"role": "user", "content": prompt},
|
60 |
+
{"role": "assistant", "content": answer}
|
61 |
+
])
|
62 |
+
|
63 |
+
conversation.append({"role": "user", "content": message})
|
64 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
print(f"Conversation is -\n{conversation}")
|
66 |
|
67 |
input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True,
|
68 |
+
return_tensors="pt", return_dict=True).to(model.device)
|
69 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
70 |
|
71 |
generate_kwargs = dict(
|
|
|
89 |
yield buffer
|
90 |
|
91 |
chatbot = gr.Chatbot()
|
92 |
+
chat_input = gr.Textbox(
|
93 |
interactive=True,
|
94 |
+
placeholder="Enter your message here...",
|
95 |
show_label=False,
|
96 |
)
|
97 |
|
98 |
EXAMPLES = [
|
99 |
+
["Analyze the geopolitical implications of recent technological advancements in AI from a Chinese perspective."],
|
100 |
+
["¿Cuáles son los desafíos éticos más importantes en el desarrollo de la inteligencia artificial general?"],
|
101 |
+
["从经济学和社会学角度分析,人工智能将如何改变未来的就业市场?"],
|
102 |
+
["ما هي التحديات الرئيسية التي تواجه تطوير الذكاء الاصطناعي في العالم العربي؟"],
|
103 |
+
["नैतिक कृत्रिम बुद्धिमत्ता विकास में सबसे बड़ी चुनौतियाँ क्या हैं? विस्तार से समझाइए।"],
|
104 |
+
["Кои са основните предизвикателства пред разработването на изкуствен интелект в България и Източна Европа?"],
|
105 |
+
["Explain the potential risks and benefits of quantum computing in national security contexts."],
|
106 |
+
["分析气候变化对全球经济不平等的影响,并提出可能的解决方案。"],
|
107 |
]
|
108 |
|
109 |
with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
|
|
|
111 |
gr.HTML(DESCRIPTION)
|
112 |
gr.ChatInterface(
|
113 |
fn=stream_chat,
|
|
|
114 |
textbox=chat_input,
|
115 |
chatbot=chatbot,
|
116 |
fill_height=True,
|
|
|
157 |
render=False,
|
158 |
),
|
159 |
],
|
160 |
+
examples=EXAMPLES,
|
161 |
+
)
|
162 |
|
163 |
if __name__ == "__main__":
|
164 |
demo.queue(api_open=False).launch(show_api=False, share=False)
|