import gradio as gr
from llama_cpp import Llama
import os

# Path to the first shard of the model
model_path = "DeepSeek-R1-Zero-Q4_K_M/DeepSeek-R1-Zero-Q4_K_M-00001-of-00009.gguf"

# Debugging: Verify working directory and model path
print("Current working directory:", os.getcwd())
print("Full model path:", os.path.join(os.getcwd(), model_path))

# Initialize the model
try:
    model = Llama(model_path=model_path, n_threads=8)
except ValueError as e:
    print(f"Error initializing the model: {e}")
    exit(1)

# Define the prediction function
def predict(prompt):
    try:
        # Generate output using the model
        output = model(prompt)
        # Extract and return the text from the response
        return output["choices"][0]["text"]
    except Exception as e:
        return f"Error during inference: {e}"

# Create the Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs="text",
    outputs="text",
    title="DeepSeek-R1-Zero",
    description="A Gradio interface for the DeepSeek-R1-Zero model"
)

if __name__ == "__main__":
    iface.launch()