Spaces:

VIDraft
/

Qwen3

Running on Zero

App Files Files Community

openfree commited on 25 days ago

Commit

31fd291

verified ·

1 Parent(s): c5529eb

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -88

app.py CHANGED Viewed

@@ -66,65 +66,42 @@ def format_conversation(history, system_prompt):
     """
     prompt = system_prompt.strip() + "\n"
-    for turn in history:
-        user_msg, assistant_msg = turn
         prompt += "User: " + user_msg.strip() + "\n"
         if assistant_msg:  # might be None or empty
             prompt += "Assistant: " + assistant_msg.strip() + "\n"
-    if not prompt.strip().endswith("Assistant:"):
-        prompt += "Assistant: "
     return prompt
-@spaces.GPU(duration=60)
-def chat_response(user_msg, history, system_prompt,
-                 model_name, max_tokens, temperature,
-                 top_k, top_p, repeat_penalty):
     """
-    Generates streaming chat responses using the standard (user, assistant) format.
     """
     cancel_event.clear()
-    # Add the user message to history
-    history = history + [[user_msg, None]]
-    # Format the conversation for the model
-    prompt = format_conversation(history, system_prompt)
     try:
         pipe = load_pipeline(model_name)
-        streamer = TextIteratorStreamer(pipe.tokenizer,
-                                        skip_prompt=True,
-                                        skip_special_tokens=True)
-        gen_thread = threading.Thread(
-            target=pipe,
-            args=(prompt,),
-            kwargs={
-                'max_new_tokens': max_tokens,
-                'temperature': temperature,
-                'top_k': top_k,
-                'top_p': top_p,
-                'repetition_penalty': repeat_penalty,
-                'streamer': streamer,
-                'return_full_text': False
-            }
-        )
-        gen_thread.start()
-        # Stream the response
-        assistant_text = ''
-        for chunk in streamer:
-            if cancel_event.is_set():
-                break
-            assistant_text += chunk
-            history[-1][1] = assistant_text
-            yield history
-        gen_thread.join()
     except Exception as e:
-        history[-1][1] = f"Error: {e}"
-        yield history
     finally:
         gc.collect()
@@ -189,14 +166,6 @@ css = """
 def get_model_name(full_selection):
     return full_selection.split(" - ")[0]
-# Function to clear chat
-def clear_chat():
-    return [], ""
-# Function to handle message submission and clear input
-def submit_message(user_input, history, system_prompt, model_name, max_tokens, temp, k, p, rp):
-    return "", history + [[user_input, None]]
 # ------------------------------
 # Gradio UI
 # ------------------------------
@@ -208,8 +177,6 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
     </div>
     """)
-    chatbot = gr.Chatbot(height=500)
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Group(elem_classes="qwen-container"):
@@ -232,18 +199,17 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
                     k = gr.Slider(1, 100, value=40, step=1, label="Top-K")
                     rp = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
-                with gr.Row():
-                    clr = gr.Button("Clear Chat", elem_classes="button-secondary")
-                    cnl = gr.Button("Cancel Generation", elem_classes="button-secondary")
         with gr.Column(scale=7):
             with gr.Row():
-                msg = gr.Textbox(
-                    placeholder="Type your message and press Enter...",
-                    lines=2,
-                    show_label=False
                 )
-                send_btn = gr.Button("Send", variant="primary", elem_classes="button-primary")
     gr.HTML("""
     <div class="footer">
@@ -251,38 +217,51 @@ with gr.Blocks(title="Qwen3 Chat", css=css) as demo:
     </div>
     """)
-    # Event handlers
-    clr.click(fn=clear_chat, outputs=[chatbot, msg])
-    cnl.click(fn=cancel_generation)
-    # Handle sending messages and generating responses
-    msg.submit(
-        fn=submit_message,
-        inputs=[msg, chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
-        outputs=[msg, chatbot]
     ).then(
-        fn=lambda history, prompt, model, tok, temp, k, p, rp:
-            chat_response(
-                history[-1][0], history[:-1], prompt,
-                get_model_name(model), tok, temp, k, p, rp
-            ),
-        inputs=[chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
-        outputs=chatbot
     )
-    send_btn.click(
-        fn=submit_message,
-        inputs=[msg, chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
-        outputs=[msg, chatbot]
     ).then(
-        fn=lambda history, prompt, model, tok, temp, k, p, rp:
-            chat_response(
-                history[-1][0], history[:-1], prompt,
-                get_model_name(model), tok, temp, k, p, rp
-            ),
-        inputs=[chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
-        outputs=chatbot
     )
 if __name__ == "__main__":
     demo.launch()

     """
     prompt = system_prompt.strip() + "\n"
+    for user_msg, assistant_msg in history:
         prompt += "User: " + user_msg.strip() + "\n"
         if assistant_msg:  # might be None or empty
             prompt += "Assistant: " + assistant_msg.strip() + "\n"
+    prompt += "Assistant: "
     return prompt
+def generate_response(user_input, history, system_prompt, model_name, max_tokens, temperature, top_k, top_p, repeat_penalty):
     """
+    Generate a complete response (non-streaming).
     """
     cancel_event.clear()
+    full_history = history.copy()
+    # Format conversation for the model
+    conversation = format_conversation(full_history, system_prompt)
     try:
         pipe = load_pipeline(model_name)
+        output = pipe(
+            conversation,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            repetition_penalty=repeat_penalty,
+            return_full_text=False
+        )[0]["generated_text"]
+        # Return the updated history
+        history.append((user_input, output))
+        return history
     except Exception as e:
+        history.append((user_input, f"Error: {e}"))
+        return history
     finally:
         gc.collect()
 def get_model_name(full_selection):
     return full_selection.split(" - ")[0]
 # ------------------------------
 # Gradio UI
 # ------------------------------
     </div>
     """)
     with gr.Row():
         with gr.Column(scale=3):
             with gr.Group(elem_classes="qwen-container"):
                     k = gr.Slider(1, 100, value=40, step=1, label="Top-K")
                     rp = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Repetition Penalty")
+                clear_btn = gr.Button("Clear Chat", elem_classes="button-secondary")
         with gr.Column(scale=7):
+            chatbot = gr.Chatbot()
             with gr.Row():
+                txt = gr.Textbox(
+                    show_label=False,
+                    placeholder="Type your message here...",
+                    lines=2
                 )
+                submit_btn = gr.Button("Send", variant="primary", elem_classes="button-primary")
     gr.HTML("""
     <div class="footer">
     </div>
     """)
+    # Define event handlers
+    def user_input(user_message, history):
+        return "", history + [(user_message, None)]
+    def bot_response(history, sys_prompt, model, max_tok, temp, k, p, rp):
+        user_message = history[-1][0]
+        bot_message = generate_response(
+            user_message,
+            history[:-1],
+            sys_prompt,
+            get_model_name(model),
+            max_tok,
+            temp,
+            k,
+            p,
+            rp
+        )[-1][1]
+        history[-1] = (user_message, bot_message)
+        return history
+    # Connect everything
+    submit_btn.click(
+        user_input,
+        [txt, chatbot],
+        [txt, chatbot],
+        queue=False
     ).then(
+        bot_response,
+        [chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
+        [chatbot]
     )
+    txt.submit(
+        user_input,
+        [txt, chatbot],
+        [txt, chatbot],
+        queue=False
     ).then(
+        bot_response,
+        [chatbot, sys_prompt, model_dd, max_tok, temp, k, p, rp],
+        [chatbot]
     )
+    clear_btn.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":
     demo.launch()