#!/bin/bash

# Qwen3-4B Tool Calling with llama-cpp-python
# This script sets up and runs the model for local inference
# 
# Usage:
#   ./run_model.sh          (requires chmod +x)
#   source ./run_model.sh   (no chmod needed)

echo "🚀 Qwen3-4B Tool Calling Setup"
echo "================================"

# Check if model file exists
if [ ! -f "Qwen3-4B-Function-Calling-Pro.gguf" ]; then
    echo "❌ Model file not found: Qwen3-4B-Function-Calling-Pro.gguf"
    echo "📥 Please download the model file first."
    echo "   You can download it from: https://huggingface.co/Manojb/qwen3-4b-toolcall-gguf-llamacpp-codex"
    exit 1
fi

# Check if Python is available
if ! command -v python3 &> /dev/null; then
    echo "❌ Python3 not found. Please install Python 3.8+ first."
    exit 1
fi

# Check if llama-cpp-python is installed
if ! python3 -c "import llama_cpp" 2>/dev/null; then
    echo "📦 Installing llama-cpp-python..."
    pip3 install llama-cpp-python
    if [ $? -ne 0 ]; then
        echo "❌ Failed to install llama-cpp-python"
        exit 1
    fi
    echo "✅ llama-cpp-python installed successfully"
fi

# Function to run the model
run_model() {
    echo "🔄 Starting Qwen3-4B Tool Calling model..."
    echo "   Model: Qwen3-4B-Function-Calling-Pro.gguf"
    echo "   Context: 2048 tokens"
    echo "   Threads: 8"
    echo ""
    echo "💡 Usage examples:"
    echo "   - 'What's the weather in London?'"
    echo "   - 'Find me a hotel in Paris'"
    echo "   - 'Calculate 25 + 17'"
    echo "   - 'Book a flight from New York to Tokyo'"
    echo ""
    echo "Press Ctrl+C to exit"
    echo "================================"
    
    python3 quick_start.py
}

# Function to run the server
run_server() {
    echo "🌐 Starting Codex-compatible server..."
    echo "   Server: http://localhost:8000"
    echo "   Model: Qwen3-4B-Function-Calling-Pro"
    echo ""
    echo "💡 Configure Codex with:"
    echo "   - Server URL: http://localhost:8000"
    echo "   - Model: Qwen3-4B-Function-Calling-Pro"
    echo "   - API Key: (not required)"
    echo ""
    echo "Press Ctrl+C to stop server"
    echo "================================"
    
    python3 -m llama_cpp.server \
        --model Qwen3-4B-Function-Calling-Pro.gguf \
        --host 0.0.0.0 \
        --port 8000 \
        --n_ctx 2048 \
        --n_threads 8 \
        --temperature 0.7
}

# Function to show help
show_help() {
    echo "Usage: $0 [OPTION]"
    echo ""
    echo "Options:"
    echo "  run, r     Run the model interactively (default)"
    echo "  server, s  Start Codex-compatible server"
    echo "  help, h    Show this help message"
    echo ""
    echo "Examples:"
    echo "  $0              # Run interactively"
    echo "  $0 run          # Run interactively"
    echo "  $0 server       # Start server for Codex"
    echo "  $0 help         # Show this help"
}

# Main script logic
case "${1:-run}" in
    "run"|"r"|"")
        run_model
        ;;
    "server"|"s")
        run_server
        ;;
    "help"|"h"|"-h"|"--help")
        show_help
        ;;
    *)
        echo "❌ Unknown option: $1"
        echo ""
        show_help
        exit 1
        ;;
esac