Spaces:

VIDraft
/

Qwen3

Running on Zero

App Files Files Community

openfree commited on 25 days ago

Commit

c5529eb

verified ·

1 Parent(s): 9290b81

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -39

app.py CHANGED Viewed

@@ -65,38 +65,38 @@ def format_conversation(history, system_prompt):
     Flatten chat history and system prompt into a single string.
     """
     prompt = system_prompt.strip() + "\n"
-    for msg in history:
-        if msg["role"] == "user":
-            prompt += "User: " + msg["content"].strip() + "\n"
-        elif msg["role"] == "assistant":
-            prompt += "Assistant: " + msg["content"].strip() + "\n"
-        else:
-            prompt += msg["content"].strip() + "\n"
     if not prompt.strip().endswith("Assistant:"):
         prompt += "Assistant: "
     return prompt
 @spaces.GPU(duration=60)
-def chat_response(user_msg, chat_history, system_prompt,
-                  model_name, max_tokens, temperature,
-                  top_k, top_p, repeat_penalty):
     """
-    Generates streaming chat responses.
     """
     cancel_event.clear()
-    history = list(chat_history or [])
-    history.append({"role": "user", "content": user_msg})
-    # Prepare assistant placeholder
-    history.append({"role": "assistant", "content": ""})
     try:
-        prompt = format_conversation(history, system_prompt)
         pipe = load_pipeline(model_name)
         streamer = TextIteratorStreamer(pipe.tokenizer,
                                         skip_prompt=True,
                                         skip_special_tokens=True)
         gen_thread = threading.Thread(
             target=pipe,
             args=(prompt,),
@@ -112,16 +112,18 @@ def chat_response(user_msg, chat_history, system_prompt,
         )
         gen_thread.start()
         assistant_text = ''
         for chunk in streamer:
             if cancel_event.is_set():
                 break
             assistant_text += chunk
-            history[-1]["content"] = assistant_text
             yield history
         gen_thread.join()
     except Exception as e:
-        history[-1]["content"] = f"Error: {e}"
         yield history
     finally:
         gc.collect()
@@ -187,12 +189,13 @@ css = """
 def get_model_name(full_selection):
     return full_selection.split(" - ")[0]
-# Function to handle message submission
-def submit_message(msg, history, prompt, model, tok, temp, k, p, rp):
-    return chat_response(
-        msg, history, prompt,
-        get_model_name(model), tok, temp, k, p, rp
-    ), ""
 # ------------------------------
 # Gradio UI
@@ -205,6 +208,8 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
     </div>
     """)
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Group(elem_classes="qwen-container"):
@@ -232,9 +237,8 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
                     cnl = gr.Button("Cancel Generation", elem_classes="button-secondary")
         with gr.Column(scale=7):
-            chat = gr.Chatbot(type="messages", height=500)
             with gr.Row():
-                txt = gr.Textbox(
                     placeholder="Type your message and press Enter...",
                     lines=2,
                     show_label=False
@@ -248,23 +252,36 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
     """)
     # Event handlers
-    clr.click(fn=lambda: ([], ""), outputs=[chat, txt])
     cnl.click(fn=cancel_generation)
-    # Handle submission from Enter key
-    txt.submit(
         fn=submit_message,
-        inputs=[txt, chat, sys_prompt, model_dd, max_tok, temp, k, p, rp],
-        outputs=[chat, txt],
-        show_progress=True
     )
-    # Handle submission from Send button
     send_btn.click(
         fn=submit_message,
-        inputs=[txt, chat, sys_prompt, model_dd, max_tok, temp, k, p, rp],
-        outputs=[chat, txt],
-        show_progress=True
     )
 if __name__ == "__main__":

     Flatten chat history and system prompt into a single string.
     """
     prompt = system_prompt.strip() + "\n"
+    for turn in history:
+        user_msg, assistant_msg = turn
+        prompt += "User: " + user_msg.strip() + "\n"
+        if assistant_msg:  # might be None or empty
+            prompt += "Assistant: " + assistant_msg.strip() + "\n"
     if not prompt.strip().endswith("Assistant:"):
         prompt += "Assistant: "
     return prompt
 @spaces.GPU(duration=60)
+def chat_response(user_msg, history, system_prompt,
+                 model_name, max_tokens, temperature,
+                 top_k, top_p, repeat_penalty):
     """
+    Generates streaming chat responses using the standard (user, assistant) format.
     """
     cancel_event.clear()
+    # Add the user message to history
+    history = history + [[user_msg, None]]
+    # Format the conversation for the model
+    prompt = format_conversation(history, system_prompt)
     try:
         pipe = load_pipeline(model_name)
         streamer = TextIteratorStreamer(pipe.tokenizer,
                                         skip_prompt=True,
                                         skip_special_tokens=True)
         gen_thread = threading.Thread(
             target=pipe,
             args=(prompt,),
         )
         gen_thread.start()
+        # Stream the response
         assistant_text = ''
         for chunk in streamer:
             if cancel_event.is_set():
                 break
             assistant_text += chunk
+            history[-1][1] = assistant_text
             yield history
         gen_thread.join()
     except Exception as e:
+        history[-1][1] = f"Error: {e}"
         yield history
     finally:
         gc.collect()
 def get_model_name(full_selection):
     return full_selection.split(" - ")[0]
+# Function to clear chat
+def clear_chat():
+    return [], ""
+# Function to handle message submission and clear input
+def submit_message(user_input, history, system_prompt, model_name, max_tokens, temp, k, p, rp):
+    return "", history + [[user_input, None]]
 # ------------------------------
 # Gradio UI
     </div>
     """)
+    chatbot = gr.Chatbot(height=500)
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Group(elem_classes="qwen-container"):
                     cnl = gr.Button("Cancel Generation", elem_classes="button-secondary")
         with gr.Column(scale=7):
             with gr.Row():
+                msg = gr.Textbox(
                     placeholder="Type your message and press Enter...",
                     lines=2,
                     show_label=False
     """)
     # Event handlers
+    clr.click(fn=clear_chat, outputs=[chatbot, msg])
     cnl.click(fn=cancel_generation)
+    # Handle sending messages and generating responses
+    msg.submit(
         fn=submit_message,
+        inputs=[msg, chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
+        outputs=[msg, chatbot]
+    ).then(
+        fn=lambda history, prompt, model, tok, temp, k, p, rp:
+            chat_response(
+                history[-1][0], history[:-1], prompt,
+                get_model_name(model), tok, temp, k, p, rp
+            ),
+        inputs=[chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
+        outputs=chatbot
     )
     send_btn.click(
         fn=submit_message,
+        inputs=[msg, chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
+        outputs=[msg, chatbot]
+    ).then(
+        fn=lambda history, prompt, model, tok, temp, k, p, rp:
+            chat_response(
+                history[-1][0], history[:-1], prompt,
+                get_model_name(model), tok, temp, k, p, rp
+            ),
+        inputs=[chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
+        outputs=chatbot
     )
 if __name__ == "__main__":