Update app.py
Browse files
app.py
CHANGED
@@ -434,6 +434,8 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
434 |
# if __name__ == "__main__":
|
435 |
# demo.launch()# app.py
|
436 |
|
|
|
|
|
437 |
import os
|
438 |
import gradio as gr
|
439 |
from huggingface_hub import InferenceClient
|
@@ -445,7 +447,7 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
445 |
|
446 |
# Initialize Hugging Face Inference Client
|
447 |
client = InferenceClient(
|
448 |
-
model="mistralai/
|
449 |
token=HF_TOKEN
|
450 |
)
|
451 |
|
@@ -457,32 +459,36 @@ system_message = (
|
|
457 |
"based on their requirements."
|
458 |
)
|
459 |
|
460 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
def respond(message, history):
|
462 |
-
|
463 |
-
messages = [{"role": "system", "content": system_message}]
|
464 |
-
for msg in history:
|
465 |
-
messages.append(msg)
|
466 |
-
messages.append({"role": "user", "content": message})
|
467 |
|
468 |
-
# Stream response from the model
|
469 |
response = ""
|
470 |
-
for chunk in client.
|
471 |
-
|
472 |
-
|
473 |
-
max_tokens=1024,
|
474 |
temperature=0.7,
|
475 |
top_p=0.95,
|
476 |
-
|
|
|
477 |
):
|
478 |
-
|
479 |
-
response += token
|
480 |
yield response
|
481 |
|
482 |
# Create Gradio interface
|
483 |
with gr.Blocks() as demo:
|
484 |
-
chatbot = gr.Chatbot(type='messages') #
|
485 |
-
gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages")
|
486 |
|
487 |
# Launch app
|
488 |
if __name__ == "__main__":
|
|
|
434 |
# if __name__ == "__main__":
|
435 |
# demo.launch()# app.py
|
436 |
|
437 |
+
# app.py
|
438 |
+
|
439 |
import os
|
440 |
import gradio as gr
|
441 |
from huggingface_hub import InferenceClient
|
|
|
447 |
|
448 |
# Initialize Hugging Face Inference Client
|
449 |
client = InferenceClient(
|
450 |
+
model="mistralai/Mamba-Codestral-7B-v0.1",
|
451 |
token=HF_TOKEN
|
452 |
)
|
453 |
|
|
|
459 |
"based on their requirements."
|
460 |
)
|
461 |
|
462 |
+
# Template for conversation history
|
463 |
+
def format_prompt(message, history):
|
464 |
+
prompt = f"System: {system_message}\n"
|
465 |
+
for user_msg, bot_msg in history:
|
466 |
+
prompt += f"User: {user_msg}\n"
|
467 |
+
if bot_msg:
|
468 |
+
prompt += f"Assistant: {bot_msg}\n"
|
469 |
+
prompt += f"User: {message}\nAssistant:"
|
470 |
+
return prompt
|
471 |
+
|
472 |
+
# Streaming chatbot logic using text generation
|
473 |
def respond(message, history):
|
474 |
+
prompt = format_prompt(message, history)
|
|
|
|
|
|
|
|
|
475 |
|
|
|
476 |
response = ""
|
477 |
+
for chunk in client.text_generation(
|
478 |
+
prompt=prompt,
|
479 |
+
max_new_tokens=1024,
|
|
|
480 |
temperature=0.7,
|
481 |
top_p=0.95,
|
482 |
+
stop_sequences=["\nUser:", "\nSystem:"],
|
483 |
+
stream=True
|
484 |
):
|
485 |
+
response += chunk
|
|
|
486 |
yield response
|
487 |
|
488 |
# Create Gradio interface
|
489 |
with gr.Blocks() as demo:
|
490 |
+
chatbot = gr.Chatbot(type='messages') # Still use modern message format
|
491 |
+
gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages")
|
492 |
|
493 |
# Launch app
|
494 |
if __name__ == "__main__":
|