Spaces:

fair-forward
/

evals-for-every-language

Running

File size: 2,249 Bytes

7c06aef

FROM python:3.12-slim

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements and install Python dependencies
COPY pyproject.toml uv.lock ./
RUN pip install uv && uv sync --frozen

# Copy application code
COPY . .

# Verify dependencies are installed
RUN .venv/bin/python -c "import pandas, datasets, evaluate, fastapi, uvicorn, google.cloud.storage, google.cloud.translate, dotenv, elevenlabs, huggingface_hub, joblib, language_data, openai, requests, scipy, aiolimiter, sentencepiece, langcodes, rich, tqdm; print('✅ All dependencies verified')"

# Set environment variables with conservative limits
ENV N_SENTENCES=20
ENV MAX_LANGUAGES=150
ENV COST_LIMIT_USD=20

# Create a startup script with cost monitoring and HTTP server
RUN echo '#!/bin/bash\n\
\n\
# Force immediate log flushing for Cloud Run visibility\n\
export PYTHONUNBUFFERED=1\n\
export PYTHONIOENCODING=utf-8\n\
\n\
echo "🚀 Starting AI Language Evaluation..."\n\
echo "📊 Configuration: $N_SENTENCES sentences, $MAX_LANGUAGES languages"\n\
echo "💰 Cost limit: $COST_LIMIT_USD USD"\n\
echo "🛡️  Cost protection enabled"\n\
echo "🔧 Logging: Unbuffered Python output enabled"\n\
\n\
# Start a simple HTTP server to satisfy Cloud Run requirements\n\
python -m http.server 8080 &\n\
HTTP_SERVER_PID=$!\n\
\n\
# Start cost monitoring in background\n\
(\n\
    start_time=$(date +%s)\n\
    while true; do\n\
        current_time=$(date +%s)\n\
        elapsed_hours=$(( (current_time - start_time) / 3600 ))\n\
        if [ $elapsed_hours -ge 24 ]; then\n\
            echo "⚠️  MAX RUNTIME REACHED! Stopping evaluation..."\n\
            pkill -f "python evals/main_gcs.py"\n\
            break\n\
        fi\n\
        sleep 300  # Check every 5 minutes\n\
    done\n\
) &\n\
\n\
# Run the evaluation with forced log flushing\n\
cd /app && .venv/bin/python -u evals/main_gcs.py\n\
\n\
# Stop the HTTP server\n\
kill $HTTP_SERVER_PID\n\
\n\
echo "✅ Evaluation completed!"\n\
' > /app/start.sh && chmod +x /app/start.sh

# Expose port (for Cloud Run requirements)
EXPOSE 8080

# Run the evaluation with resource limits
CMD ["/app/start.sh"]