Spaces:

eustlb
/

transformers-audio-ci

Sleeping

App Files Files Community

eustlb HF Staff commited on Jun 10

Commit

4f55f90

1 Parent(s): 5923f2b

delete useless

Browse files

Files changed (1) hide show

fetch_ci_results.py +0 -120

fetch_ci_results.py DELETED Viewed

@@ -1,120 +0,0 @@
-import requests
-import yaml
-import os
-import re
-import asyncio
-import aiohttp
-import pandas as pd
-from tqdm import tqdm
-def get_audio_models():
-    url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/en/_toctree.yml"
-    response = requests.get(url)
-    if response.status_code != 200:
-        print("Failed to fetch the YAML file")
-        return []
-    toctree_content = yaml.safe_load(response.text)
-    for section in toctree_content:
-        if section.get('title') == 'API':
-            for subsection in section.get('sections', []):
-                if subsection.get('title') == 'Models':
-                    for model_section in subsection.get('sections', []):
-                        if model_section.get('title') == 'Audio models':
-                            return [audio_model.get('local').split('/')[-1].lower().replace('-', '_') for audio_model in model_section.get('sections', []) if 'local' in audio_model]
-    return []
-def fetch_and_process_ci_results(job_id):
-    github_token = os.environ.get('GITHUB_TOKEN')
-    if not github_token:
-        raise ValueError("GitHub token not found in environment variables")
-    headers = {
-        "Authorization": f"token {github_token}",
-        "Accept": "application/vnd.github+json"
-    }
-    audio_models = get_audio_models()
-    non_tested_models = [
-        "xls_r",
-        "speech_to_text_2",
-        "mctct",
-        "xlsr_wav2vec2",
-        "mms"
-    ]
-    url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{job_id}/jobs"
-    audio_model_jobs = {audio_model: [] for audio_model in audio_models}
-    def process_jobs(jobs_data):
-        for job in jobs_data['jobs']:
-            if "Model CI" in job['name'] and "models" in job['name']:
-                match = re.search(r'models/([^/)]+)', job['name'])
-                if match:
-                    model_name = match.group(1).lower()
-                    if model_name in audio_model_jobs:
-                        audio_model_jobs[model_name].append(job['id'])
-    async def fetch_and_process_jobs(session, url):
-        async with session.get(url, headers=headers) as response:
-            jobs_data = await response.json()
-            process_jobs(jobs_data)
-            return response.links.get('next', {}).get('url')
-    async def fetch_all_jobs():
-        async with aiohttp.ClientSession() as session:
-            next_url = url
-            with tqdm(desc="Fetching jobs", unit="page") as pbar:
-                while next_url:
-                    next_url = await fetch_and_process_jobs(session, next_url)
-                    pbar.update(1)
-    def parse_test_results(text):
-        pattern = r'=+ (?:(\d+) failed,?\s*)?(?:(\d+) passed,?\s*)?(?:(\d+) skipped,?\s*)?(?:\d+ warnings?\s*)?in \d+\.\d+s'
-        match = re.search(pattern, text)
-        if match:
-            failed = int(match.group(1)) if match.group(1) else 0
-            passed = int(match.group(2)) if match.group(2) else 0
-            skipped = int(match.group(3)) if match.group(3) else 0
-            return {'failed': failed, 'passed': passed, 'skipped': skipped}
-        raise Exception("Could not find test summary in logs")
-    def retrieve_job_logs(job_id, job_name):
-        url = f"https://api.github.com/repos/huggingface/transformers/actions/jobs/{job_id}"
-        response = requests.get(url, headers=headers)
-        logs_url = f"https://api.github.com/repos/huggingface/transformers/actions/jobs/{job_id}/logs"
-        logs_response = requests.get(logs_url, headers=headers)
-        logs = logs_response.text
-        test_summary = parse_test_results(logs)
-        test_summary["model"] = job_name
-        test_summary["conclusion"] = response.json()['conclusion']
-        return test_summary
-    # Fetch initial jobs and run asynchronous job fetching
-    response = requests.get(url, headers=headers)
-    jobs = response.json()
-    process_jobs(jobs)
-    asyncio.run(fetch_all_jobs())
-    # Retrieve job logs and process results
-    results = []
-    for job_name, job_ids in tqdm(audio_model_jobs.items()):
-        for job_id in job_ids:
-            result = retrieve_job_logs(job_id, job_name)
-            results.append(result)
-    # Process results into DataFrame and save to CSV
-    df = (pd.DataFrame(results)
-                .melt(id_vars=['model', 'conclusion'],
-                    value_vars=['failed', 'passed', 'skipped'],
-                    var_name='test_type',
-                    value_name='number_of_tests')
-                .groupby(['model', 'conclusion', 'test_type'])
-                .agg({'number_of_tests': 'sum'})
-                .reset_index())
-    df.to_csv('test_results_by_type.csv', index=False)