Commit
·
050a9de
0
Parent(s):
leaderboard
Browse files- .gitattributes +35 -0
- .gitignore +5 -0
- README.md +12 -0
- app.py +142 -0
- assets/leaderboard_small.jpg +0 -0
- assets/leaderboard_small.png +0 -0
- constants.py +89 -0
- init.py +93 -0
- requirements.txt +61 -0
- utils_display.py +40 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
__pycache__/
|
3 |
+
requested_models/
|
4 |
+
repos/
|
5 |
+
TODO
|
README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Open Object Detection Leaderboard
|
3 |
+
emoji: 🏆
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.38.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import json
|
4 |
+
from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS
|
5 |
+
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
|
6 |
+
from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
|
7 |
+
from datetime import datetime, timezone
|
8 |
+
|
9 |
+
column_names = {"AP-IoU=0.50:0.95-area=all-maxDets=100": "AP",
|
10 |
+
"AP-IoU=0.50-area=all-maxDets=100": "AP@.50",
|
11 |
+
"AP-IoU=0.75-area=all-maxDets=100": "AP@.75",
|
12 |
+
"AP-IoU=0.50:0.95-area=small-maxDets=100" : "AP-S",
|
13 |
+
"AP-IoU=0.50:0.95-area=medium-maxDets=100": "AP-M",
|
14 |
+
"AP-IoU=0.50:0.95-area=large-maxDets=100": "AP-L",
|
15 |
+
"AR-IoU=0.50:0.95-area=all-maxDets=1": "AR1",
|
16 |
+
"AR-IoU=0.50:0.95-area=all-maxDets=10": "AR10",
|
17 |
+
"AR-IoU=0.50:0.95-area=all-maxDets=100": "AR100",
|
18 |
+
"AR-IoU=0.50:0.95-area=small-maxDets=100": "AR-S",
|
19 |
+
"AR-IoU=0.50:0.95-area=medium-maxDets=100": "AR-M",
|
20 |
+
"AR-IoU=0.50:0.95-area=large-maxDets=100": "AR-L"}
|
21 |
+
|
22 |
+
eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
|
23 |
+
|
24 |
+
if not csv_results.exists():
|
25 |
+
raise Exception(f"CSV file {csv_results} does not exist locally")
|
26 |
+
|
27 |
+
# Get csv with data and parse columns
|
28 |
+
original_df = pd.read_csv(csv_results)
|
29 |
+
|
30 |
+
# Formats the columns
|
31 |
+
def formatter(x):
|
32 |
+
x = "{:.2%}".format(x)
|
33 |
+
while len(x) < 6:
|
34 |
+
x = f"0{x}"
|
35 |
+
return x
|
36 |
+
|
37 |
+
for col in original_df.columns:
|
38 |
+
if col == "model":
|
39 |
+
original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
|
40 |
+
else:
|
41 |
+
original_df[col] = original_df[col].apply(formatter) # For % values
|
42 |
+
# original_df[col] = original_df[col].multiply(100).round(2)
|
43 |
+
|
44 |
+
original_df.rename(columns=column_names, inplace=True)
|
45 |
+
|
46 |
+
COLS = [c.name for c in fields(AutoEvalColumn)]
|
47 |
+
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
48 |
+
|
49 |
+
|
50 |
+
def request_model(model_text, chbcoco2017):
|
51 |
+
|
52 |
+
# Determine the selected checkboxes
|
53 |
+
dataset_selection = []
|
54 |
+
if chbcoco2017:
|
55 |
+
dataset_selection.append("COCO validation 2017 dataset")
|
56 |
+
|
57 |
+
if len(dataset_selection) == 0:
|
58 |
+
return styled_error("You need to select at least one dataset")
|
59 |
+
|
60 |
+
base_model_on_hub, error_msg = is_model_on_hub(model_text)
|
61 |
+
|
62 |
+
if not base_model_on_hub:
|
63 |
+
return styled_error(f"Base model '{model_text}' {error_msg}")
|
64 |
+
|
65 |
+
# Construct the output dictionary
|
66 |
+
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
67 |
+
required_datasets = ', '.join(dataset_selection)
|
68 |
+
eval_entry = {
|
69 |
+
"date": current_time,
|
70 |
+
"model": model_text,
|
71 |
+
"datasets_selected": required_datasets
|
72 |
+
}
|
73 |
+
|
74 |
+
# Prepare file path
|
75 |
+
DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)
|
76 |
+
|
77 |
+
fn_datasets = '@ '.join(dataset_selection)
|
78 |
+
filename = model_text.replace("/","@") + "@@" + fn_datasets
|
79 |
+
if filename in requested_models:
|
80 |
+
return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.")
|
81 |
+
try:
|
82 |
+
filename_ext = filename + ".txt"
|
83 |
+
out_filepath = DIR_OUTPUT_REQUESTS / filename_ext
|
84 |
+
|
85 |
+
# Write the results to a text file
|
86 |
+
with open(out_filepath, "w") as f:
|
87 |
+
f.write(json.dumps(eval_entry))
|
88 |
+
|
89 |
+
upload_file(filename, out_filepath)
|
90 |
+
|
91 |
+
# Include file in the list of uploaded files
|
92 |
+
requested_models.append(filename)
|
93 |
+
|
94 |
+
# Remove the local file
|
95 |
+
out_filepath.unlink()
|
96 |
+
|
97 |
+
return styled_message("🤗 Your request has been submitted and will be evaluated soon!</p>")
|
98 |
+
except Exception as e:
|
99 |
+
return styled_error(f"Error submitting request!")
|
100 |
+
|
101 |
+
with gr.Blocks() as demo:
|
102 |
+
gr.HTML(BANNER, elem_id="banner")
|
103 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
104 |
+
|
105 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
106 |
+
with gr.TabItem("🏅 COCO val 2017", elem_id="od-benchmark-tab-table", id=0):
|
107 |
+
leaderboard_table = gr.components.Dataframe(
|
108 |
+
value=original_df,
|
109 |
+
datatype=TYPES,
|
110 |
+
max_rows=None,
|
111 |
+
elem_id="leaderboard-table",
|
112 |
+
interactive=False,
|
113 |
+
visible=True,
|
114 |
+
)
|
115 |
+
|
116 |
+
with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
|
117 |
+
gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
|
118 |
+
|
119 |
+
with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=2):
|
120 |
+
with gr.Column():
|
121 |
+
gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
|
122 |
+
with gr.Column():
|
123 |
+
gr.Markdown("Select a dataset:", elem_classes="markdown-text")
|
124 |
+
with gr.Column():
|
125 |
+
model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")
|
126 |
+
chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset", visible=False, value=True, interactive=False)
|
127 |
+
with gr.Column():
|
128 |
+
mdw_submission_result = gr.Markdown()
|
129 |
+
btn_submitt = gr.Button(value="🚀 Request")
|
130 |
+
btn_submitt.click(request_model,
|
131 |
+
[model_name_textbox, chb_coco2017],
|
132 |
+
mdw_submission_result)
|
133 |
+
|
134 |
+
with gr.Row():
|
135 |
+
with gr.Accordion("📙 Citation", open=False):
|
136 |
+
gr.Textbox(
|
137 |
+
value=CITATION_TEXT, lines=7,
|
138 |
+
label="Copy the BibTeX snippet to cite this source",
|
139 |
+
elem_id="citation-button",
|
140 |
+
).style(show_copy_button=True)
|
141 |
+
|
142 |
+
demo.launch(debug=True)
|
assets/leaderboard_small.jpg
ADDED
![]() |
assets/leaderboard_small.png
ADDED
![]() |
constants.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
# Directory where request by models are stored
|
5 |
+
DIR_OUTPUT_REQUESTS = Path("requested_models")
|
6 |
+
EVAL_REQUESTS_PATH = Path("eval_requests")
|
7 |
+
|
8 |
+
##########################
|
9 |
+
# Text definitions #
|
10 |
+
##########################
|
11 |
+
|
12 |
+
banner_url = "https://huggingface.co/spaces/rafaelpadilla/object_detection_leaderboard/resolve/main/assets/leaderboard_small.png"
|
13 |
+
BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>'
|
14 |
+
|
15 |
+
TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> 🤗 Open Object Detection Leaderboard </b> </body> </html>"
|
16 |
+
|
17 |
+
INTRODUCTION_TEXT = "📐 The 🤗 Open Object Detection Leaderboard aims to track, rank and evaluate vision models \
|
18 |
+
available in the hub designed to detect objects in images. \
|
19 |
+
Anyone from the community can request a model to be evaluated and added to the leaderboard. \
|
20 |
+
\nCheck the 📈 Metrics tab to understand how the models are evaluated. \
|
21 |
+
\nIf you want results for a model that is not listed here, you can ✉️✨ request results for it."
|
22 |
+
|
23 |
+
CITATION_TEXT = '''@misc{open-od-leaderboard,
|
24 |
+
author = {Rafael Padilla, Amy Roberts and the Hugging Face Team},
|
25 |
+
title = {Open Object Detection Leaderboard},
|
26 |
+
year = {2023},
|
27 |
+
publisher = {Hugging Face},
|
28 |
+
howpublished = "\\url{https://huggingface.co/spaces/rafaelpadilla/object_detection_leaderboard}"
|
29 |
+
}
|
30 |
+
'''
|
31 |
+
|
32 |
+
METRICS_TAB_TEXT = '''
|
33 |
+
🎯 Average Precision and Average Recall are popular metrics for evaluating the accuracy of object detectors by estimating the Precision-Recall relationship.
|
34 |
+
|
35 |
+
Here you will find details about the object detection metrics reported in our leaderboard.
|
36 |
+
|
37 |
+
# Metrics
|
38 |
+
|
39 |
+
There are plenty of variations of these metrics, depending on the IoU threshold, the area of the object, and the number of detections per image. The most popular ones are:
|
40 |
+
|
41 |
+
## Average Precision (AP)
|
42 |
+
- **AP**: AP at IoU=.50:.05:.95
|
43 |
+
- **AP@.50 (APIoU=.50)**: AP at IoU=.50 (similar to mAP PASCAL VOC metric)
|
44 |
+
- **AP@.75 (APIoU=.75)**: AP at IoU=.75 (strict metric)
|
45 |
+
|
46 |
+
## Average Precision Across Scales
|
47 |
+
- **AP-S (APsmall)**: AP for small objects: area < 322.
|
48 |
+
- **AP-M (APmedium)**: AP for medium objects: 322 < area < 962.
|
49 |
+
- **AP-L (APlarge)**: AP for large objects: area > 962.
|
50 |
+
|
51 |
+
## Average Recall (AR)
|
52 |
+
- **AR1 (ARmax=1)**: AR given 1 detection per image.
|
53 |
+
- **AR10 (ARmax=10)**: AR given 10 detections per image.
|
54 |
+
- **AR100 (ARmax=100)**: AR given 100 detections per image.
|
55 |
+
|
56 |
+
## Average Recall Across Scales
|
57 |
+
- **AR-S (ARsmall)**: AR for small objects: area < 322.
|
58 |
+
- **AR-M (ARmedium)**: AR for medium objects: 322 < area < 962.
|
59 |
+
- **AR-L (ARlarge)**: AR for large objects: area > 962.
|
60 |
+
|
61 |
+
## How to reproduce our results
|
62 |
+
|
63 |
+
To compute these metrics, various tools employ different methods. For this leaderboard's evaluation, we utilize the COCO evaluation approach, which can be found in the [COCO evaluation toolkit](https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py).
|
64 |
+
|
65 |
+
The 🤗 `Evaluate` metric used to measure the results is also accessible in the hub: [detection_metrics](https://huggingface.co/spaces/rafaelpadilla/detection_metrics)
|
66 |
+
|
67 |
+
It is essential to note that slight differences in results may arise between the results presented here and those from other sources. These differences can be attributed to numerical approximations, variations in batch sizes, and other hyperparameters. To ensure a consistent evaluation, we recommend using a batch size of 1 and setting the confidence threshold to 0 when evaluating your model.
|
68 |
+
|
69 |
+
## Benchmark datasets
|
70 |
+
|
71 |
+
We understand that the object detection task can be quite diverse and the requirements can vary greatly across different domains such as autonomous driving, medical imaging, aerial imaging, etc.
|
72 |
+
|
73 |
+
With this in mind, we are interested in knowing if there are specific domains or applications that you believe should be addressed by our benchmarks. Please, [join our discussion](https://huggingface.co/spaces/rafaelpadilla/object_detection_leaderboard/discussions/1) and give your suggestion.
|
74 |
+
|
75 |
+
### COCO dataset
|
76 |
+
|
77 |
+
The Microsoft Common Objects in Context (COCO) dataset is a highly regarded benchmark for object detection models due to its comprehensive set of 80 object categories, extensive volume of images with complex scenes, and high-quality, manually annotated labels.
|
78 |
+
|
79 |
+
Moreover, its versatility in supporting multiple computer vision tasks, along with a standardized format and an active community, makes it a robust, challenging, and easily comparable benchmark.
|
80 |
+
|
81 |
+
The benchmarking COCO validation 2017 dataset is available in the 🤗 hub: [coco2017](https://huggingface.co/datasets/rafaelpadilla/coco2017)
|
82 |
+
|
83 |
+
## 📚 Useful Readings
|
84 |
+
|
85 |
+
For further insight into the subject, you may find the following readings helpful:
|
86 |
+
|
87 |
+
- [A Survey on Performance Metrics for Object-Detection Algorithms](https://www.researchgate.net/profile/Rafael-Padilla/publication/343194514_A_Survey_on_Performance_Metrics_for_Object-Detection_Algorithms/links/5f1b5a5e45851515ef478268/A-Survey-on-Performance-Metrics-for-Object-Detection-Algorithms.pdf), R Padilla, SL Netto, EAB Da Silva - *IWSSIP, 2020*
|
88 |
+
- [A Comparative Analysis of Object Detection Metrics with a Companion Open-Source Toolkit](https://www.mdpi.com/2079-9292/10/3/279/pdf), R Padilla, WL Passos, TLB Dias, SL Netto, EAB Da Silva - *Journal Electronics, 2021*
|
89 |
+
'''
|
init.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from constants import EVAL_REQUESTS_PATH
|
3 |
+
from pathlib import Path
|
4 |
+
from huggingface_hub import HfApi, Repository
|
5 |
+
|
6 |
+
TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
|
7 |
+
QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
|
8 |
+
QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
|
9 |
+
|
10 |
+
hf_api = HfApi(
|
11 |
+
endpoint="https://huggingface.co",
|
12 |
+
token=TOKEN_HUB,
|
13 |
+
)
|
14 |
+
|
15 |
+
def load_all_info_from_dataset_hub():
|
16 |
+
eval_queue_repo = None
|
17 |
+
results_csv_path = None
|
18 |
+
requested_models = None
|
19 |
+
|
20 |
+
passed = True
|
21 |
+
if TOKEN_HUB is None:
|
22 |
+
passed = False
|
23 |
+
else:
|
24 |
+
print("Pulling evaluation requests and results.")
|
25 |
+
|
26 |
+
eval_queue_repo = Repository(
|
27 |
+
local_dir=QUEUE_PATH,
|
28 |
+
clone_from=QUEUE_REPO,
|
29 |
+
use_auth_token=TOKEN_HUB,
|
30 |
+
repo_type="dataset",
|
31 |
+
)
|
32 |
+
eval_queue_repo.git_pull()
|
33 |
+
|
34 |
+
# Local directory where dataset repo is cloned + folder with eval requests
|
35 |
+
directory = QUEUE_PATH / EVAL_REQUESTS_PATH
|
36 |
+
requested_models = get_all_requested_models(directory)
|
37 |
+
requested_models = [p.stem for p in requested_models]
|
38 |
+
# Local directory where dataset repo is cloned
|
39 |
+
csv_results = get_csv_with_results(QUEUE_PATH)
|
40 |
+
if csv_results is None:
|
41 |
+
passed = False
|
42 |
+
if not passed:
|
43 |
+
print("No HuggingFace token provided. Skipping evaluation requests and results.")
|
44 |
+
|
45 |
+
return eval_queue_repo, requested_models, csv_results
|
46 |
+
|
47 |
+
|
48 |
+
def upload_file(requested_model_name, path_or_fileobj):
|
49 |
+
dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
|
50 |
+
dest_repo_file = str(dest_repo_file)
|
51 |
+
hf_api.upload_file(
|
52 |
+
path_or_fileobj=path_or_fileobj,
|
53 |
+
path_in_repo=str(dest_repo_file),
|
54 |
+
repo_id=QUEUE_REPO,
|
55 |
+
token=TOKEN_HUB,
|
56 |
+
repo_type="dataset",
|
57 |
+
commit_message=f"Add {requested_model_name} to eval queue")
|
58 |
+
|
59 |
+
def get_all_requested_models(directory):
|
60 |
+
directory = Path(directory)
|
61 |
+
all_requested_models = list(directory.glob("*.txt"))
|
62 |
+
return all_requested_models
|
63 |
+
|
64 |
+
def get_csv_with_results(directory):
|
65 |
+
directory = Path(directory)
|
66 |
+
all_csv_files = list(directory.glob("*.csv"))
|
67 |
+
latest = [f for f in all_csv_files if f.stem.endswith("latest")]
|
68 |
+
if len(latest) != 1:
|
69 |
+
return None
|
70 |
+
return latest[0]
|
71 |
+
|
72 |
+
|
73 |
+
|
74 |
+
def is_model_on_hub(model_name, revision="main") -> bool:
|
75 |
+
try:
|
76 |
+
model_name = model_name.replace(" ","")
|
77 |
+
author = model_name.split("/")[0]
|
78 |
+
model_id = model_name.split("/")[1]
|
79 |
+
if len(author) == 0 or len(model_id) == 0:
|
80 |
+
return False, "is not a valid model name. Please use the format `author/model_name`."
|
81 |
+
except Exception as e:
|
82 |
+
return False, "is not a valid model name. Please use the format `author/model_name`."
|
83 |
+
|
84 |
+
try:
|
85 |
+
models = list(hf_api.list_models(author=author, search=model_id))
|
86 |
+
matched = [model_name for m in models if m.modelId == model_name]
|
87 |
+
if len(matched) != 1:
|
88 |
+
return False, "was not found on the hub!"
|
89 |
+
else:
|
90 |
+
return True, None
|
91 |
+
except Exception as e:
|
92 |
+
print(f"Could not get the model from the hub.: {e}")
|
93 |
+
return False, "was not found on hub!"
|
requirements.txt
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.8.4
|
2 |
+
aiosignal==1.3.1
|
3 |
+
async-timeout==4.0.2
|
4 |
+
attrs==23.1.0
|
5 |
+
certifi==2023.7.22
|
6 |
+
charset-normalizer==3.2.0
|
7 |
+
cmake==3.26.4
|
8 |
+
contourpy==1.1.0
|
9 |
+
Cython==3.0.0
|
10 |
+
datasets==2.13.1
|
11 |
+
dill==0.3.6
|
12 |
+
filelock==3.12.2
|
13 |
+
fonttools==4.40.0
|
14 |
+
frozenlist==1.4.0
|
15 |
+
fsspec==2023.6.0
|
16 |
+
huggingface-hub==0.16.4
|
17 |
+
idna==3.4
|
18 |
+
Jinja2==3.1.2
|
19 |
+
kiwisolver==1.4.4
|
20 |
+
lit==16.0.6
|
21 |
+
MarkupSafe==2.1.3
|
22 |
+
matplotlib==3.7.2
|
23 |
+
mpmath==1.3.0
|
24 |
+
multidict==6.0.4
|
25 |
+
multiprocess==0.70.14
|
26 |
+
networkx==3.1
|
27 |
+
numpy==1.25.2
|
28 |
+
nvidia-cublas-cu11==11.10.3.66
|
29 |
+
nvidia-cuda-cupti-cu11==11.7.101
|
30 |
+
nvidia-cuda-nvrtc-cu11==11.7.99
|
31 |
+
nvidia-cuda-runtime-cu11==11.7.99
|
32 |
+
nvidia-cudnn-cu11==8.5.0.96
|
33 |
+
nvidia-cufft-cu11==10.9.0.58
|
34 |
+
nvidia-curand-cu11==10.2.10.91
|
35 |
+
nvidia-cusolver-cu11==11.4.0.1
|
36 |
+
nvidia-cusparse-cu11==11.7.4.91
|
37 |
+
nvidia-nccl-cu11==2.14.3
|
38 |
+
nvidia-nvtx-cu11==11.7.91
|
39 |
+
packaging==23.1
|
40 |
+
pandas==2.0.3
|
41 |
+
Pillow==10.0.0
|
42 |
+
pyarrow==12.0.1
|
43 |
+
python-dateutil==2.8.2
|
44 |
+
pytz==2023.3
|
45 |
+
PyYAML==6.0.1
|
46 |
+
regex==2023.6.3
|
47 |
+
requests==2.31.0
|
48 |
+
responses==0.18.0
|
49 |
+
safetensors==0.3.1
|
50 |
+
six==1.16.0
|
51 |
+
sympy==1.12
|
52 |
+
tokenizers==0.13.3
|
53 |
+
torch==2.0.1
|
54 |
+
torchvision==0.15.2
|
55 |
+
tqdm==4.65.0
|
56 |
+
triton==2.0.0
|
57 |
+
typing_extensions==4.7.1
|
58 |
+
tzdata==2023.3
|
59 |
+
urllib3==2.0.4
|
60 |
+
xxhash==3.2.0
|
61 |
+
yarl==1.9.2
|
utils_display.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
# These classes are for user facing column names, to avoid having to change them
|
4 |
+
# all around the code when a modif is needed
|
5 |
+
@dataclass
|
6 |
+
class ColumnContent:
|
7 |
+
name: str
|
8 |
+
type: str
|
9 |
+
|
10 |
+
def fields(raw_class):
|
11 |
+
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
|
12 |
+
|
13 |
+
@dataclass(frozen=True)
|
14 |
+
class AutoEvalColumn: # Auto evals column
|
15 |
+
model = ColumnContent("Model", "markdown")
|
16 |
+
ap = ColumnContent("AP", "str")
|
17 |
+
ap50 = ColumnContent("AP@.50", "number")
|
18 |
+
ap75 = ColumnContent("AP@.75 ⬆️", "number")
|
19 |
+
ap_small = ColumnContent("APₛ", "number")
|
20 |
+
ap_medium = ColumnContent("APₘ", "number")
|
21 |
+
ap_large = ColumnContent("APₗ", "number")
|
22 |
+
ar_1 = ColumnContent("AR₁", "number")
|
23 |
+
ar_10 = ColumnContent("AR₁₀", "number")
|
24 |
+
ar_100 = ColumnContent("AR₁₀₀", "number")
|
25 |
+
ar_small = ColumnContent("ARₛ", "number")
|
26 |
+
ar_medium = ColumnContent("ARₘ", "number")
|
27 |
+
ar_large = ColumnContent("ARₗ", "number")
|
28 |
+
|
29 |
+
def make_clickable_model(model_name):
|
30 |
+
link = f"https://huggingface.co/{model_name}"
|
31 |
+
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
32 |
+
|
33 |
+
def styled_error(error):
|
34 |
+
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
35 |
+
|
36 |
+
def styled_warning(warn):
|
37 |
+
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
|
38 |
+
|
39 |
+
def styled_message(message):
|
40 |
+
return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
|