#!/usr/bin/env python3 # ---------------------------------------------------------------------- # Test script for GPU quota handling # ---------------------------------------------------------------------- import requests import json import time import sys # ---------------------------------------------------------------------- # Configuration # ---------------------------------------------------------------------- BASE_URL = "http://localhost:7860" TEST_IMAGE_URL = "https://cdn.shopify.com/s/files/1/0505/0928/3527/files/hugging_face_test_image_shirt_product_type.jpg" # ---------------------------------------------------------------------- # Test Functions # ---------------------------------------------------------------------- def test_quota_info(): """Test the quota info endpoint""" print("\n=== Testing /api/quota-info ===") response = requests.get(f"{BASE_URL}/api/quota-info") print(f"Status Code: {response.status_code}") if response.status_code == 200: data = response.json() print(f"Quota Management: {data.get('quota_management')}") print(f"User Type Quotas:") for user_type, info in data.get('user_type_quotas', {}).items(): print(f" - {user_type}: {info.get('quota_seconds')}s ({info.get('description')})") if 'last_quota_error' in data: print(f"\nLast Quota Error:") error_info = data['last_quota_error'] print(f" - Time ago: {error_info.get('time_ago_seconds')}s") print(f" - Retry after: {error_info.get('retry_after')}s") print(f" - Estimated recovery: {error_info.get('estimated_recovery')}s") if 'usage_stats' in data: print(f"\nUsage Stats (Last Hour):") stats = data['usage_stats']['last_hour'] print(f" - Total requests: {stats.get('total_requests')}") print(f" - Successful: {stats.get('successful')}") print(f" - Failed: {stats.get('failed')}") print(f" - GPU seconds used: {stats.get('total_gpu_seconds', 0):.2f}s") print(f" - Average duration: {stats.get('average_duration', 0):.2f}s") def test_process_image(): """Test image processing and quota error handling""" print("\n=== Testing Image Processing ===") payload = { "data": [ [{"url": TEST_IMAGE_URL}], "Shirt" ] } response = requests.post( f"{BASE_URL}/api/rb_and_crop", json=payload, timeout=180 ) print(f"Status Code: {response.status_code}") # Check if we got a quota error if response.status_code == 429: print("Got expected 429 (Too Many Requests) status for quota exceeded") # Check for Retry-After header retry_after = response.headers.get('Retry-After') if retry_after: print(f"Retry-After header: {retry_after}s") else: print("WARNING: No Retry-After header found") # Parse error response data = response.json() print(f"Error Type: {data.get('error_type')}") print(f"Error Message: {data.get('error_message')[:100]}...") error_details = data.get('error_details', {}) if 'retry_after' in error_details: print(f"Retry After (from body): {error_details['retry_after']}s") if 'quota_info' in error_details: quota_info = error_details['quota_info'] print(f"\nQuota Info:") print(f" - Message: {quota_info.get('message')}") print(f" - Calculated retry: {quota_info.get('calculated_retry')}s") elif response.status_code == 503: print("Got 503 (Service Unavailable) - GPU warming up") data = response.json() print(f"Details: {data.get('detail')}") elif response.status_code == 200: print("Success! Image processed") data = response.json() if 'processed_images' in data: for img in data['processed_images']: print(f" - URL: {img.get('url')}") print(f" - Status: {img.get('status')}") if img.get('status') == 'error': print(f" - Error: {img.get('error')}") else: print(f"Unexpected status code: {response.status_code}") try: print(f"Response: {response.json()}") except: print(f"Response text: {response.text[:200]}") def test_simulate_quota_error(): """Simulate multiple requests to trigger quota error""" print("\n=== Simulating Quota Exhaustion ===") print("This will make multiple requests to exhaust GPU quota...") for i in range(3): print(f"\nAttempt {i + 1}:") test_process_image() # Check quota status after each attempt time.sleep(2) test_quota_info() if i < 2: print("\nWaiting 5 seconds before next attempt...") time.sleep(5) # ---------------------------------------------------------------------- # Main # ---------------------------------------------------------------------- if __name__ == "__main__": print("GPU Quota Handling Test") print("=======================") # Check if service is running try: response = requests.get(f"{BASE_URL}/health") if response.status_code == 200: health = response.json() print(f"Service is healthy") print(f"Device: {health.get('device')}") print(f"Models loaded: {health.get('models_loaded')}") print(f"GPU available: {health.get('gpu_available')}") else: print("Service health check failed") sys.exit(1) except Exception as e: print(f"Cannot connect to service at {BASE_URL}: {e}") print("Make sure the service is running!") sys.exit(1) # Run tests if len(sys.argv) > 1 and sys.argv[1] == "exhaust": test_simulate_quota_error() else: test_quota_info() test_process_image() print("\nTest completed!")