nikravan commited on
Commit
5ebc32e
·
verified ·
1 Parent(s): 466bfc4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -101
app.py CHANGED
@@ -1,28 +1,21 @@
1
  import torch
2
- from PIL import Image
3
  import gradio as gr
4
  import spaces
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
6
  import os
7
  from threading import Thread
8
 
9
- import pymupdf
10
- import docx
11
- from pptx import Presentation
12
- from_tf=True
13
-
14
-
15
  MODEL_LIST = ["THUDM/GLM-4-Z1-32B-0414"]
16
 
17
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
18
  MODEL_ID = MODEL_LIST[0]
19
  MODEL_NAME = "GLM-4-Z1-32B-0414"
20
 
21
- TITLE = "<h1>3ML-bot</h1>"
22
 
23
  DESCRIPTION = f"""
24
  <center>
25
- <p>😊 A Multi-Modal Multi-Lingual(3ML) Chat.
26
  <br>
27
  🚀 MODEL NOW: <a href="https://hf.co/nikravan/glm-4vq">{MODEL_NAME}</a>
28
  </center>"""
@@ -44,58 +37,6 @@ quantization_config = BitsAndBytesConfig(
44
 
45
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
46
 
47
- def extract_text(path):
48
- return open(path, 'r').read()
49
-
50
- def extract_pdf(path):
51
- doc = pymupdf.open(path)
52
- text = ""
53
- for page in doc:
54
- text += page.get_text()
55
- return text
56
-
57
- def extract_docx(path):
58
- doc = docx.Document(path)
59
- data = []
60
- for paragraph in doc.paragraphs:
61
- data.append(paragraph.text)
62
- content = '\n\n'.join(data)
63
- return content
64
-
65
- def extract_pptx(path):
66
- prs = Presentation(path)
67
- text = ""
68
- for slide in prs.slides:
69
- for shape in slide.shapes:
70
- if hasattr(shape, "text"):
71
- text += shape.text + "\n"
72
- return text
73
-
74
- def mode_load(path):
75
- choice = ""
76
- file_type = path.split(".")[-1]
77
- print(file_type)
78
- if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
79
- if file_type.endswith("pdf"):
80
- content = extract_pdf(path)
81
- elif file_type.endswith("docx"):
82
- content = extract_docx(path)
83
- elif file_type.endswith("pptx"):
84
- content = extract_pptx(path)
85
- else:
86
- content = extract_text(path)
87
- choice = "doc"
88
- print(content[:100])
89
- return choice, content[:5000]
90
-
91
- elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
92
- content = Image.open(path).convert('RGB')
93
- choice = "image"
94
- return choice, content
95
-
96
- else:
97
- raise gr.Error("Oops, unsupported files.")
98
-
99
  @spaces.GPU()
100
  def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
101
 
@@ -110,41 +51,21 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
110
 
111
  print(f'message is - {message}')
112
  print(f'history is - {history}')
 
113
  conversation = []
114
- prompt_files = []
115
- if message["files"]:
116
- choice, contents = mode_load(message["files"][-1])
117
- if choice == "image":
118
- conversation.append({"role": "user", "image": contents, "content": message['text']})
119
- elif choice == "doc":
120
- format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
121
- conversation.append({"role": "user", "content": format_msg})
122
- else:
123
- if len(history) == 0:
124
- contents = None
125
- conversation.append({"role": "user", "content": message['text']})
126
- else:
127
- for prompt, answer in history:
128
- if answer is None:
129
- prompt_files.append(prompt[0])
130
- conversation.extend([{"role": "user", "content": ""}, {"role": "assistant", "content": ""}])
131
- else:
132
- conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
133
- if len(prompt_files) > 0:
134
- choice, contents = mode_load(prompt_files[-1])
135
- else:
136
- choice = ""
137
- conversation.append({"role": "user", "image": "", "content": message['text']})
138
-
139
- if choice == "image":
140
- conversation.append({"role": "user", "image": contents, "content": message['text']})
141
- elif choice == "doc":
142
- format_msg = contents + "\n\n\n" + "{} files uploaded.\n" + message['text']
143
- conversation.append({"role": "user", "content": format_msg})
144
  print(f"Conversation is -\n{conversation}")
145
 
146
  input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True,
147
- return_tensors="pt", return_dict=True).to(model.device)
148
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
149
 
150
  generate_kwargs = dict(
@@ -168,17 +89,21 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
168
  yield buffer
169
 
170
  chatbot = gr.Chatbot()
171
- chat_input = gr.MultimodalTextbox(
172
  interactive=True,
173
- placeholder="Enter message or upload a file ...",
174
  show_label=False,
175
  )
176
 
177
  EXAMPLES = [
178
- [{"text": "Write a poem about spring season in French Language", }],
179
- [{"text": "what does this chart mean?", "files": ["sales.png"]}],
180
- [{"text": "¿Qué está escrito a mano en esta foto?", "files": ["receipt1.png"]}],
181
- [{"text": "در مورد این عکس توضیح بده و بگو این چه فصلی می تواند باشد", "files": ["nature.jpg"]}]
 
 
 
 
182
  ]
183
 
184
  with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
@@ -186,7 +111,6 @@ with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
186
  gr.HTML(DESCRIPTION)
187
  gr.ChatInterface(
188
  fn=stream_chat,
189
- multimodal=True,
190
  textbox=chat_input,
191
  chatbot=chatbot,
192
  fill_height=True,
@@ -233,8 +157,8 @@ with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
233
  render=False,
234
  ),
235
  ],
236
- ),
237
- gr.Examples(EXAMPLES, [chat_input])
238
 
239
  if __name__ == "__main__":
240
  demo.queue(api_open=False).launch(show_api=False, share=False)
 
1
  import torch
 
2
  import gradio as gr
3
  import spaces
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
5
  import os
6
  from threading import Thread
7
 
 
 
 
 
 
 
8
  MODEL_LIST = ["THUDM/GLM-4-Z1-32B-0414"]
9
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  MODEL_ID = MODEL_LIST[0]
12
  MODEL_NAME = "GLM-4-Z1-32B-0414"
13
 
14
+ TITLE = "<h1>3ML-bot (Text Only)</h1>"
15
 
16
  DESCRIPTION = f"""
17
  <center>
18
+ <p>😊 A Multi-Lingual Analytical Chatbot.
19
  <br>
20
  🚀 MODEL NOW: <a href="https://hf.co/nikravan/glm-4vq">{MODEL_NAME}</a>
21
  </center>"""
 
37
 
38
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  @spaces.GPU()
41
  def stream_chat(message, history: list, temperature: float, max_length: int, top_p: float, top_k: int, penalty: float):
42
 
 
51
 
52
  print(f'message is - {message}')
53
  print(f'history is - {history}')
54
+
55
  conversation = []
56
+ if len(history) > 0:
57
+ for prompt, answer in history:
58
+ conversation.extend([
59
+ {"role": "user", "content": prompt},
60
+ {"role": "assistant", "content": answer}
61
+ ])
62
+
63
+ conversation.append({"role": "user", "content": message})
64
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  print(f"Conversation is -\n{conversation}")
66
 
67
  input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True,
68
+ return_tensors="pt", return_dict=True).to(model.device)
69
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
70
 
71
  generate_kwargs = dict(
 
89
  yield buffer
90
 
91
  chatbot = gr.Chatbot()
92
+ chat_input = gr.Textbox(
93
  interactive=True,
94
+ placeholder="Enter your message here...",
95
  show_label=False,
96
  )
97
 
98
  EXAMPLES = [
99
+ ["Analyze the geopolitical implications of recent technological advancements in AI from a Chinese perspective."],
100
+ ["¿Cuáles son los desafíos éticos más importantes en el desarrollo de la inteligencia artificial general?"],
101
+ ["从经济学和社会学角度分析,人工智能将如何改变未来的就业市场?"],
102
+ ["ما هي التحديات الرئيسية التي تواجه تطوير الذكاء الاصطناعي في العالم العربي؟"],
103
+ ["नैतिक कृत्रिम बुद्धिमत्ता विकास में सबसे बड़ी चुनौतियाँ क्या हैं? विस्तार से समझाइए।"],
104
+ ["Кои са основните предизвикателства пред разработването на изкуствен интелект в България и Източна Европа?"],
105
+ ["Explain the potential risks and benefits of quantum computing in national security contexts."],
106
+ ["分析气候变化对全球经济不平等的影响,并提出可能的解决方案。"],
107
  ]
108
 
109
  with gr.Blocks(css=CSS, theme="soft", fill_height=True) as demo:
 
111
  gr.HTML(DESCRIPTION)
112
  gr.ChatInterface(
113
  fn=stream_chat,
 
114
  textbox=chat_input,
115
  chatbot=chatbot,
116
  fill_height=True,
 
157
  render=False,
158
  ),
159
  ],
160
+ examples=EXAMPLES,
161
+ )
162
 
163
  if __name__ == "__main__":
164
  demo.queue(api_open=False).launch(show_api=False, share=False)