Spaces:

codewithdark
/

LatentRecurrentDepthLM

Sleeping

App Files Files Community

codewithdark commited on Feb 25

Commit

d86d806

verified ·

1 Parent(s): 2dc2198

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -55

app.py CHANGED Viewed

@@ -1,66 +1,40 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Initialize Hugging Face Inference API client
-hf_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-# Load the second model (local)
-local_model_name = "codewithdark/latent-recurrent-depth-lm"
-tokenizer = AutoTokenizer.from_pretrained(local_model_name)
-model = AutoModelForCausalLM.from_pretrained(local_model_name, trust_remote_code=True)
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model.to(device).eval()  # Set model to evaluation mode
-def generate_response(
-    message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, model_choice
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    if model_choice == "Zephyr-7B (API)":
-        response = ""
-        try:
-            for message in hf_client.chat_completion(
-                messages=messages,
-                max_tokens=max_tokens,
-                stream=True,
-                temperature=temperature,
-                top_p=top_p,
-            ):
-                token = message.choices[0].delta.content if message.choices else ""
-                response += token
-                yield response
-        except Exception as e:
-            yield f"Error in API response: {e}"
-    else:
-        input_text = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
-        with torch.no_grad():
-            output = model.generate(input_text, max_length=max_tokens, temperature=temperature, top_p=top_p)
-        response = tokenizer.decode(output[0], skip_special_tokens=True).strip()
-        for i in range(len(response)):
-            yield response[: i + 1]
-# Gradio UI
-demo = gr.ChatInterface(
-    generate_response,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-        gr.Radio(["Zephyr-7B (API)", "Latent Recurrent Depth LM"], value="Zephyr-7B (API)", label="Select Model"),
-    ],
-)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoModel, AutoTokenizer
+# Load the local model
+model_name = "codewithdark/latent-recurrent-depth-lm"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device).eval()  # Set to evaluation mode
+# Define inference function
+def chat_with_model(input_text, model_choice):
+    if model_choice == "Latent Recurrent Depth LM":
+        input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)
+        with torch.no_grad():
+            output = model.generate(input_ids, max_length=512)
+        response = tokenizer.decode(output[0], skip_special_tokens=True)
+        return response
+    return "Model not available yet!"
+# Create Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 Chat with Latent Recurrent Depth LM")
+    model_choice = gr.Radio(
+        ["Latent Recurrent Depth LM"],  # Add more models if needed
+        label="Select Model",
+        value="Latent Recurrent Depth LM"
+    )
+    text_input = gr.Textbox(label="Enter your message")
+    submit_button = gr.Button("Generate Response")
+    output_text = gr.Textbox(label="Model Response")
+    submit_button.click(fn=chat_with_model, inputs=[text_input, model_choice], outputs=output_text)
+# Launch the Gradio app
 if __name__ == "__main__":
     demo.launch()