#!/bin/bash # Qwen3-4B Tool Calling with llama-cpp-python # This script sets up and runs the model for local inference # # Usage: # ./run_model.sh (requires chmod +x) # source ./run_model.sh (no chmod needed) echo "🚀 Qwen3-4B Tool Calling Setup" echo "================================" # Check if model file exists if [ ! -f "Qwen3-4B-Function-Calling-Pro.gguf" ]; then echo "❌ Model file not found: Qwen3-4B-Function-Calling-Pro.gguf" echo "📥 Please download the model file first." echo " You can download it from: https://huggingface.co/Manojb/qwen3-4b-toolcall-gguf-llamacpp-codex" exit 1 fi # Check if Python is available if ! command -v python3 &> /dev/null; then echo "❌ Python3 not found. Please install Python 3.8+ first." exit 1 fi # Check if llama-cpp-python is installed if ! python3 -c "import llama_cpp" 2>/dev/null; then echo "📦 Installing llama-cpp-python..." pip3 install llama-cpp-python if [ $? -ne 0 ]; then echo "❌ Failed to install llama-cpp-python" exit 1 fi echo "✅ llama-cpp-python installed successfully" fi # Function to run the model run_model() { echo "🔄 Starting Qwen3-4B Tool Calling model..." echo " Model: Qwen3-4B-Function-Calling-Pro.gguf" echo " Context: 2048 tokens" echo " Threads: 8" echo "" echo "💡 Usage examples:" echo " - 'What's the weather in London?'" echo " - 'Find me a hotel in Paris'" echo " - 'Calculate 25 + 17'" echo " - 'Book a flight from New York to Tokyo'" echo "" echo "Press Ctrl+C to exit" echo "================================" python3 quick_start.py } # Function to run the server run_server() { echo "🌐 Starting Codex-compatible server..." echo " Server: http://localhost:8000" echo " Model: Qwen3-4B-Function-Calling-Pro" echo "" echo "💡 Configure Codex with:" echo " - Server URL: http://localhost:8000" echo " - Model: Qwen3-4B-Function-Calling-Pro" echo " - API Key: (not required)" echo "" echo "Press Ctrl+C to stop server" echo "================================" python3 -m llama_cpp.server \ --model Qwen3-4B-Function-Calling-Pro.gguf \ --host 0.0.0.0 \ --port 8000 \ --n_ctx 2048 \ --n_threads 8 \ --temperature 0.7 } # Function to show help show_help() { echo "Usage: $0 [OPTION]" echo "" echo "Options:" echo " run, r Run the model interactively (default)" echo " server, s Start Codex-compatible server" echo " help, h Show this help message" echo "" echo "Examples:" echo " $0 # Run interactively" echo " $0 run # Run interactively" echo " $0 server # Start server for Codex" echo " $0 help # Show this help" } # Main script logic case "${1:-run}" in "run"|"r"|"") run_model ;; "server"|"s") run_server ;; "help"|"h"|"-h"|"--help") show_help ;; *) echo "❌ Unknown option: $1" echo "" show_help exit 1 ;; esac