import gradio as gr from llama_cpp import Llama # Provide the relative path to the first shard of your model model_path = "DeepSeek-R1-Zero-Q4_K_M/DeepSeek-R1-Zero-Q4_K_M-00001-of-00009.gguf" # Initialize the model try: model = Llama(model_path=model_path, n_threads=8) except ValueError as e: print(f"Error initializing the model: {e}") exit(1) # Define the prediction function def predict(prompt): try: # Generate output using the model output = model(prompt) return output["choices"][0]["text"] # Adjust this based on output structure except Exception as e: return f"Error during inference: {e}" # Create the Gradio interface iface = gr.Interface( fn=predict, inputs="text", outputs="text", title="DeepSeek-R1-Zero", description="A Gradio interface for the DeepSeek-R1-Zero model" ) if __name__ == "__main__": iface.launch()