services: Qwen3-Reranker-4B: container_name: Qwen3-Reranker-4B restart: no #image: dengcao/vllm-openai:v0.9.2-dev #采用vllm最新的开发版制作的镜像,经在NVIDIA RTX3060平台主机上测试正常,可放心使用 #image: dengcao/vllm-openai:v0.9.2rc2 image: dengcao/vllm-openai:v0.9.2 ipc: host volumes: - ./models:/models command: ['--model', '/models/Qwen3-Reranker-4B', '--served-model-name', 'Qwen3-Reranker-4B', '--gpu-memory-utilization', '0.90', '--hf_overrides','{"architectures": ["Qwen3ForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}'] ports: - 8011:8000 deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu]