#!/bin/bash # Test script for TEI with the converted ONNX model echo "Testing Qwen3-Embedding-0.6B INT8 ONNX with TEI..." echo "This model is quantized for faster CPU inference" MODEL_PATH=$(pwd) echo "Model path: $MODEL_PATH" echo "Files in model directory:" ls -la $MODEL_PATH echo "" echo "Expected performance improvement: 2-4x faster on CPU" echo "Note: There may be a small accuracy drop (1-3%)" echo "" echo "To use this model with TEI:" echo "1. Upload to HuggingFace Hub, or" echo "2. Mount this directory in your TEI container" echo "3. Update model-id in porter.yaml to point to this model" echo "" echo "For optimal CPU performance, set these environment variables:" echo "export OMP_NUM_THREADS=$(nproc) # Use all physical cores" echo "export KMP_AFFINITY=granularity=fine,compact,1,0" echo "export ORT_THREAD_POOL_SIZE=$(nproc)"