Spaces:

Jeans4you
/

product-image-update-port-1

Sleeping

product-image-update-port-1 / tests /test_quota_handling.py

GitHub Actions

Deploy to Hugging Face Space: product-image-update-port-1

7c4d825 about 2 months ago

6.21 kB

	#!/usr/bin/env python3
	# ----------------------------------------------------------------------
	# Test script for GPU quota handling
	# ----------------------------------------------------------------------
	import requests
	import json
	import time
	import sys

	# ----------------------------------------------------------------------
	# Configuration
	# ----------------------------------------------------------------------
	BASE_URL = "http://localhost:7860"
	TEST_IMAGE_URL = "https://cdn.shopify.com/s/files/1/0505/0928/3527/files/hugging_face_test_image_shirt_product_type.jpg"

	# ----------------------------------------------------------------------
	# Test Functions
	# ----------------------------------------------------------------------
	def test_quota_info():
	"""Test the quota info endpoint"""
	print("\n=== Testing /api/quota-info ===")

	response = requests.get(f"{BASE_URL}/api/quota-info")

	print(f"Status Code: {response.status_code}")

	if response.status_code == 200:
	data = response.json()
	print(f"Quota Management: {data.get('quota_management')}")
	print(f"User Type Quotas:")
	for user_type, info in data.get('user_type_quotas', {}).items():
	print(f" - {user_type}: {info.get('quota_seconds')}s ({info.get('description')})")

	if 'last_quota_error' in data:
	print(f"\nLast Quota Error:")
	error_info = data['last_quota_error']
	print(f" - Time ago: {error_info.get('time_ago_seconds')}s")
	print(f" - Retry after: {error_info.get('retry_after')}s")
	print(f" - Estimated recovery: {error_info.get('estimated_recovery')}s")

	if 'usage_stats' in data:
	print(f"\nUsage Stats (Last Hour):")
	stats = data['usage_stats']['last_hour']
	print(f" - Total requests: {stats.get('total_requests')}")
	print(f" - Successful: {stats.get('successful')}")
	print(f" - Failed: {stats.get('failed')}")
	print(f" - GPU seconds used: {stats.get('total_gpu_seconds', 0):.2f}s")
	print(f" - Average duration: {stats.get('average_duration', 0):.2f}s")


	def test_process_image():
	"""Test image processing and quota error handling"""
	print("\n=== Testing Image Processing ===")

	payload = {
	"data": [
	[{"url": TEST_IMAGE_URL}],
	"Shirt"
	]
	}

	response = requests.post(
	f"{BASE_URL}/api/rb_and_crop",
	json=payload,
	timeout=180
	)

	print(f"Status Code: {response.status_code}")

	# Check if we got a quota error
	if response.status_code == 429:
	print("Got expected 429 (Too Many Requests) status for quota exceeded")

	# Check for Retry-After header
	retry_after = response.headers.get('Retry-After')
	if retry_after:
	print(f"Retry-After header: {retry_after}s")
	else:
	print("WARNING: No Retry-After header found")

	# Parse error response
	data = response.json()
	print(f"Error Type: {data.get('error_type')}")
	print(f"Error Message: {data.get('error_message')[:100]}...")

	error_details = data.get('error_details', {})
	if 'retry_after' in error_details:
	print(f"Retry After (from body): {error_details['retry_after']}s")

	if 'quota_info' in error_details:
	quota_info = error_details['quota_info']
	print(f"\nQuota Info:")
	print(f" - Message: {quota_info.get('message')}")
	print(f" - Calculated retry: {quota_info.get('calculated_retry')}s")

	elif response.status_code == 503:
	print("Got 503 (Service Unavailable) - GPU warming up")
	data = response.json()
	print(f"Details: {data.get('detail')}")

	elif response.status_code == 200:
	print("Success! Image processed")
	data = response.json()
	if 'processed_images' in data:
	for img in data['processed_images']:
	print(f" - URL: {img.get('url')}")
	print(f" - Status: {img.get('status')}")
	if img.get('status') == 'error':
	print(f" - Error: {img.get('error')}")

	else:
	print(f"Unexpected status code: {response.status_code}")
	try:
	print(f"Response: {response.json()}")
	except:
	print(f"Response text: {response.text[:200]}")


	def test_simulate_quota_error():
	"""Simulate multiple requests to trigger quota error"""
	print("\n=== Simulating Quota Exhaustion ===")
	print("This will make multiple requests to exhaust GPU quota...")

	for i in range(3):
	print(f"\nAttempt {i + 1}:")
	test_process_image()

	# Check quota status after each attempt
	time.sleep(2)
	test_quota_info()

	if i < 2:
	print("\nWaiting 5 seconds before next attempt...")
	time.sleep(5)


	# ----------------------------------------------------------------------
	# Main
	# ----------------------------------------------------------------------
	if __name__ == "__main__":
	print("GPU Quota Handling Test")
	print("=======================")

	# Check if service is running
	try:
	response = requests.get(f"{BASE_URL}/health")
	if response.status_code == 200:
	health = response.json()
	print(f"Service is healthy")
	print(f"Device: {health.get('device')}")
	print(f"Models loaded: {health.get('models_loaded')}")
	print(f"GPU available: {health.get('gpu_available')}")
	else:
	print("Service health check failed")
	sys.exit(1)
	except Exception as e:
	print(f"Cannot connect to service at {BASE_URL}: {e}")
	print("Make sure the service is running!")
	sys.exit(1)

	# Run tests
	if len(sys.argv) > 1 and sys.argv[1] == "exhaust":
	test_simulate_quota_error()
	else:
	test_quota_info()
	test_process_image()

	print("\nTest completed!")