Spaces:
Sleeping
Sleeping
File size: 8,521 Bytes
94b0fbf 5923f2b 3498650 94b0fbf 5923f2b 94b0fbf 5923f2b 94b0fbf 5923f2b 94b0fbf 5923f2b 94b0fbf 5923f2b 94b0fbf 5923f2b 3498650 5923f2b 94b0fbf 5923f2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
import pandas as pd
import gradio as gr
import requests
import yaml
import os
import re
import asyncio
import aiohttp
import pandas as pd
from tqdm import tqdm
from get_last_ci_run import get_last_ci_run_id
def get_audio_models():
url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/en/_toctree.yml"
response = requests.get(url)
if response.status_code != 200:
print("Failed to fetch the YAML file")
return []
toctree_content = yaml.safe_load(response.text)
for section in toctree_content:
if section.get('title') == 'API':
for subsection in section.get('sections', []):
if subsection.get('title') == 'Models':
for model_section in subsection.get('sections', []):
if model_section.get('title') == 'Audio models':
return [audio_model.get('local').split('/')[-1].lower().replace('-', '_') for audio_model in model_section.get('sections', []) if 'local' in audio_model]
return []
def fetch_and_process_ci_results(job_id):
github_token = os.environ.get('GITHUB_TOKEN')
if not github_token:
raise ValueError("GitHub token not found in environment variables")
headers = {
"Authorization": f"token {github_token}",
"Accept": "application/vnd.github+json"
}
audio_models = get_audio_models()
non_tested_models = [
"xls_r",
"speech_to_text_2",
"mctct",
"xlsr_wav2vec2",
"mms"
]
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{job_id}/jobs"
audio_model_jobs = {audio_model: [] for audio_model in audio_models}
def process_jobs(jobs_data):
for job in jobs_data['jobs']:
if "Model CI" in job['name'] and "models" in job['name']:
match = re.search(r'models/([^/)]+)', job['name'])
if match:
model_name = match.group(1).lower()
if model_name in audio_model_jobs:
audio_model_jobs[model_name].append(job['id'])
async def fetch_and_process_jobs(session, url):
async with session.get(url, headers=headers) as response:
jobs_data = await response.json()
process_jobs(jobs_data)
return response.links.get('next', {}).get('url')
async def fetch_all_jobs():
async with aiohttp.ClientSession() as session:
next_url = url
with tqdm(desc="Fetching jobs", unit="page") as pbar:
while next_url:
next_url = await fetch_and_process_jobs(session, next_url)
pbar.update(1)
def parse_test_results(text):
pattern = r'=+ (?:(\d+) failed,?\s*)?(?:(\d+) passed,?\s*)?(?:(\d+) skipped,?\s*)?(?:\d+ warnings?\s*)?in \d+\.\d+s'
match = re.search(pattern, text)
if match:
failed = int(match.group(1)) if match.group(1) else 0
passed = int(match.group(2)) if match.group(2) else 0
skipped = int(match.group(3)) if match.group(3) else 0
return {'failed': failed, 'passed': passed, 'skipped': skipped}
raise Exception("Could not find test summary in logs")
def retrieve_job_logs(job_id, job_name):
url = f"https://api.github.com/repos/huggingface/transformers/actions/jobs/{job_id}"
response = requests.get(url, headers=headers)
logs_url = f"https://api.github.com/repos/huggingface/transformers/actions/jobs/{job_id}/logs"
logs_response = requests.get(logs_url, headers=headers)
logs = logs_response.text
test_summary = parse_test_results(logs)
test_summary["model"] = job_name
test_summary["conclusion"] = response.json()['conclusion']
return test_summary
# Fetch initial jobs and run asynchronous job fetching
response = requests.get(url, headers=headers)
jobs = response.json()
process_jobs(jobs)
asyncio.run(fetch_all_jobs())
# Retrieve job logs and process results
results = []
for job_name, job_ids in tqdm(audio_model_jobs.items()):
for job_id in job_ids:
result = retrieve_job_logs(job_id, job_name)
results.append(result)
# Process results into DataFrame and save to CSV
df = (pd.DataFrame(results)
.melt(id_vars=['model', 'conclusion'],
value_vars=['failed', 'passed', 'skipped'],
var_name='test_type',
value_name='number_of_tests')
.groupby(['model', 'conclusion', 'test_type'])
.agg({'number_of_tests': 'sum'})
.reset_index())
df.to_csv('test_results_by_type.csv', index=False)
def load_and_process_data():
# Load the CSV file
model_test_results = pd.read_csv('test_results_by_type.csv')
# Get models with failed tests and their failure counts
failed_models_counts = model_test_results[
(model_test_results['test_type'] == 'failed') &
(model_test_results['number_of_tests'] > 0)
].groupby('model')['number_of_tests'].first().to_dict()
# Add β and failure count to model names that have failures, β
for passing models
model_test_results['model'] = model_test_results.apply(
lambda row: f"{row['model']} β ({failed_models_counts[row['model']]})" if row['model'] in failed_models_counts else f"{row['model']} β
",
axis=1
)
# Separate failed tests and other tests
failed_tests = model_test_results[model_test_results['test_type'] == 'failed'].sort_values('number_of_tests', ascending=False)
other_tests = model_test_results[model_test_results['test_type'] != 'failed']
# Concatenate the dataframes
model_test_results = pd.concat([failed_tests, other_tests])
# Sort models by success/failure and number of failed tests
model_order = model_test_results.sort_values(
by=['conclusion', 'test_type', 'number_of_tests'],
ascending=[True, False, False]
)['model'].unique().tolist()
return model_test_results, model_order, failed_models_counts
def create_bar_plot(model_test_results, model_order, failed_models_counts):
return gr.BarPlot(
model_test_results,
x="model",
y="number_of_tests", # Base layer
color="test_type", # Color by pass/fail status
color_map={"passed": "#008550", "skipped": "#F0B702", "failed": "#8B1710"},
title="Test Results by Model",
x_title=f"Models ({len(failed_models_counts)} failing / {len(model_order)} total)",
y_title="Number of Tests",
height=600,
width=1000,
x_label_angle=45, # Rotate x-axis labels by 45 degrees
x_order=model_order # Set custom order of x-axis
)
# Create the Gradio interface
with gr.Blocks() as results_viz:
gr.Markdown("# Test Results by Model")
model_test_results, model_order, failed_models_counts = load_and_process_data()
test_results_plot = create_bar_plot(model_test_results, model_order, failed_models_counts)
with gr.Row():
refresh_btn = gr.Button(
value="Refresh CI Results (~2mn)",
variant="primary"
)
refresh_status = gr.Textbox()
def check_and_refresh():
# For now just return hardcoded ID
latest_ci_id = str(get_last_ci_run_id())
try:
with open("ci_id.txt", "r") as f:
current_ci_id = f.read().strip()
except FileNotFoundError:
current_ci_id = ""
if latest_ci_id == current_ci_id:
return "No new CI results available yet.", test_results_plot
else:
fetch_and_process_ci_results(latest_ci_id)
with open("ci_id.txt", "w") as f:
f.write(latest_ci_id)
# Reload and reprocess the data
new_model_test_results, new_model_order, new_failed_models_counts = load_and_process_data()
# Create a new BarPlot with the updated data
new_test_results_plot = create_bar_plot(new_model_test_results, new_model_order, new_failed_models_counts)
return "CI results refreshed successfully!", new_test_results_plot
refresh_btn.click(fn=check_and_refresh, outputs=[refresh_status, test_results_plot])
if __name__ == "__main__":
results_viz.launch() |