eustlb HF Staff commited on
Commit
4f55f90
·
1 Parent(s): 5923f2b

delete useless

Browse files
Files changed (1) hide show
  1. fetch_ci_results.py +0 -120
fetch_ci_results.py DELETED
@@ -1,120 +0,0 @@
1
- import requests
2
- import yaml
3
- import os
4
- import re
5
- import asyncio
6
- import aiohttp
7
- import pandas as pd
8
- from tqdm import tqdm
9
-
10
- def get_audio_models():
11
- url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/en/_toctree.yml"
12
- response = requests.get(url)
13
-
14
- if response.status_code != 200:
15
- print("Failed to fetch the YAML file")
16
- return []
17
-
18
- toctree_content = yaml.safe_load(response.text)
19
-
20
- for section in toctree_content:
21
- if section.get('title') == 'API':
22
- for subsection in section.get('sections', []):
23
- if subsection.get('title') == 'Models':
24
- for model_section in subsection.get('sections', []):
25
- if model_section.get('title') == 'Audio models':
26
- return [audio_model.get('local').split('/')[-1].lower().replace('-', '_') for audio_model in model_section.get('sections', []) if 'local' in audio_model]
27
-
28
- return []
29
-
30
- def fetch_and_process_ci_results(job_id):
31
- github_token = os.environ.get('GITHUB_TOKEN')
32
- if not github_token:
33
- raise ValueError("GitHub token not found in environment variables")
34
-
35
- headers = {
36
- "Authorization": f"token {github_token}",
37
- "Accept": "application/vnd.github+json"
38
- }
39
-
40
- audio_models = get_audio_models()
41
- non_tested_models = [
42
- "xls_r",
43
- "speech_to_text_2",
44
- "mctct",
45
- "xlsr_wav2vec2",
46
- "mms"
47
- ]
48
-
49
- url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{job_id}/jobs"
50
-
51
- audio_model_jobs = {audio_model: [] for audio_model in audio_models}
52
-
53
- def process_jobs(jobs_data):
54
- for job in jobs_data['jobs']:
55
- if "Model CI" in job['name'] and "models" in job['name']:
56
- match = re.search(r'models/([^/)]+)', job['name'])
57
- if match:
58
- model_name = match.group(1).lower()
59
- if model_name in audio_model_jobs:
60
- audio_model_jobs[model_name].append(job['id'])
61
-
62
- async def fetch_and_process_jobs(session, url):
63
- async with session.get(url, headers=headers) as response:
64
- jobs_data = await response.json()
65
- process_jobs(jobs_data)
66
- return response.links.get('next', {}).get('url')
67
-
68
- async def fetch_all_jobs():
69
- async with aiohttp.ClientSession() as session:
70
- next_url = url
71
- with tqdm(desc="Fetching jobs", unit="page") as pbar:
72
- while next_url:
73
- next_url = await fetch_and_process_jobs(session, next_url)
74
- pbar.update(1)
75
-
76
- def parse_test_results(text):
77
- pattern = r'=+ (?:(\d+) failed,?\s*)?(?:(\d+) passed,?\s*)?(?:(\d+) skipped,?\s*)?(?:\d+ warnings?\s*)?in \d+\.\d+s'
78
- match = re.search(pattern, text)
79
- if match:
80
- failed = int(match.group(1)) if match.group(1) else 0
81
- passed = int(match.group(2)) if match.group(2) else 0
82
- skipped = int(match.group(3)) if match.group(3) else 0
83
- return {'failed': failed, 'passed': passed, 'skipped': skipped}
84
- raise Exception("Could not find test summary in logs")
85
-
86
- def retrieve_job_logs(job_id, job_name):
87
- url = f"https://api.github.com/repos/huggingface/transformers/actions/jobs/{job_id}"
88
- response = requests.get(url, headers=headers)
89
- logs_url = f"https://api.github.com/repos/huggingface/transformers/actions/jobs/{job_id}/logs"
90
- logs_response = requests.get(logs_url, headers=headers)
91
- logs = logs_response.text
92
- test_summary = parse_test_results(logs)
93
- test_summary["model"] = job_name
94
- test_summary["conclusion"] = response.json()['conclusion']
95
- return test_summary
96
-
97
- # Fetch initial jobs and run asynchronous job fetching
98
- response = requests.get(url, headers=headers)
99
- jobs = response.json()
100
- process_jobs(jobs)
101
- asyncio.run(fetch_all_jobs())
102
-
103
- # Retrieve job logs and process results
104
- results = []
105
- for job_name, job_ids in tqdm(audio_model_jobs.items()):
106
- for job_id in job_ids:
107
- result = retrieve_job_logs(job_id, job_name)
108
- results.append(result)
109
-
110
- # Process results into DataFrame and save to CSV
111
- df = (pd.DataFrame(results)
112
- .melt(id_vars=['model', 'conclusion'],
113
- value_vars=['failed', 'passed', 'skipped'],
114
- var_name='test_type',
115
- value_name='number_of_tests')
116
- .groupby(['model', 'conclusion', 'test_type'])
117
- .agg({'number_of_tests': 'sum'})
118
- .reset_index())
119
-
120
- df.to_csv('test_results_by_type.csv', index=False)