#!/usr/bin/env python3 """ Quick start example for Qwen3-4B Tool Calling """ import json import re from llama_cpp import Llama def extract_tool_calls(text): """Extract tool calls from model response""" tool_calls = [] json_pattern = r'\[.*?\]' matches = re.findall(json_pattern, text) for match in matches: try: parsed = json.loads(match) if isinstance(parsed, list): for item in parsed: if isinstance(item, dict) and 'name' in item: tool_calls.append(item) except json.JSONDecodeError: continue return tool_calls def main(): """Quick start demo""" print("šŸš€ Qwen3-4B Tool Calling - Quick Start") print("=" * 50) # Load the model print("Loading model...") llm = Llama( model_path="Qwen3-4B-Function-Calling-Pro.gguf", n_ctx=1024, # Smaller context for quick demo n_threads=4, temperature=0.7, verbose=False ) print("āœ… Model loaded!") # Interactive demo print("\nšŸ’¬ Interactive Demo (type 'quit' to exit)") print("-" * 50) while True: try: user_input = input("\nYou: ").strip() if user_input.lower() in ['quit', 'exit', 'q']: break if not user_input: continue # Format prompt formatted_prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n" # Generate response response = llm( formatted_prompt, max_tokens=200, stop=["<|im_end|>", "<|im_start|>"], temperature=0.7 ) response_text = response['choices'][0]['text'] print(f"\nAssistant: {response_text}") # Check for tool calls tool_calls = extract_tool_calls(response_text) if tool_calls: print(f"\nšŸ”§ Tool Calls ({len(tool_calls)}):") for i, tool_call in enumerate(tool_calls, 1): print(f" {i}. {tool_call['name']}") print(f" Arguments: {tool_call.get('arguments', {})}") except KeyboardInterrupt: print("\n\nGoodbye! šŸ‘‹") break except Exception as e: print(f"Error: {e}") if __name__ == "__main__": main()