import gradio as gr
from llama_cpp import Llama

# Provide the relative path to the first shard of your model
model_path = "DeepSeek-R1-Zero-Q4_K_M/DeepSeek-R1-Zero-Q4_K_M-00001-of-00009.gguf"

# Initialize the model
try:
    model = Llama(model_path=model_path, n_threads=8)
except ValueError as e:
    print(f"Error initializing the model: {e}")
    exit(1)

# Define the prediction function
def predict(prompt):
    try:
        # Generate output using the model
        output = model(prompt)
        return output["choices"][0]["text"]  # Adjust this based on output structure
    except Exception as e:
        return f"Error during inference: {e}"

# Create the Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs="text",
    outputs="text",
    title="DeepSeek-R1-Zero",
    description="A Gradio interface for the DeepSeek-R1-Zero model"
)

if __name__ == "__main__":
    iface.launch()