|
TITLE = """<h1 align="center" id="space-title">๐ Online Mind2Web Leaderboard</h1>""" |
|
LINKS = """ |
|
<div align="center"> |
|
<a href="#">Blog</a> | |
|
<a href="#">Paper</a> | |
|
<a href="https://github.com/OSU-NLP-Group/Online-Mind2Web">Code</a> | |
|
<a href="https://huggingface.co/datasets/osunlp/Online-Mind2Web">Data</a> |
|
</div> |
|
""" |
|
|
|
INTRODUCTION_TEXT = """ |
|
Online Mind2Web is a benchmark designed to evaluate the real-world performance of web agents on live websites, featuring 300 tasks across 136 popular sites in diverse domains. |
|
Based on the number of steps required by human annotators, tasks are divided into three difficulty levels: Easy (1โ5 steps), Medium (6โ10 steps), and Hard (11+ steps). |
|
""" |
|
|
|
LEADERBOARD_TEXT = """ |
|
### Leaderboard |
|
We maintain two leaderboardsโone for automated evaluation and another for human evaluation. |
|
All submissions will be auto-evaluated internally, and if human evaluation results are provided, a subset will be selected for rigorous spot-check verification. |
|
""" |
|
|
|
SUBMISSION_TEXT = """ |
|
## Submissions |
|
Participants are invited to submit your agent's trajectory to test. The submissions will be evaluated based on our auto-eval. |
|
|
|
### Format of submission |
|
Submissions must include a sequence of images (i.e., screenshots in the trajectory) and a result.json file for each task. The JSON file should contain the fields: "Task", "Task_id", and "action_history". You can refer to an example of the submission files. |
|
""" |
|
|
|
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" |
|
CITATION_BUTTON_TEXT = r""" |
|
Online Mind2Web""" |
|
|
|
SUBMIT_INTRODUCTION = """ |
|
## โ Please submit the trajectory file with the following format: |
|
Each task is stored in a folder named after its `task_id`, containing: |
|
|
|
- `trajectory/`: Stores screenshots of each step. |
|
- `result.json`: Task metadata and action history. |
|
|
|
**Structure:** |
|
``` |
|
main_directory/ |
|
โโโ task_id/ |
|
โโโ result.json |
|
โโโ trajectory/ |
|
โโโ 0_screenshot.png |
|
โโโ 1_screenshot.png |
|
โโโ ... |
|
``` |
|
|
|
**`result.json` format:** |
|
```json |
|
{ |
|
"task_id": 123, |
|
"task": "abc", |
|
"action_history": ["abc", "xyz", "..."] |
|
} |
|
``` |
|
Please send your agent's name, model family, and organization via email to xue.681@osu.edu, along with the trajectory directory attached. |
|
|
|
We will run the auto-evaluation. If you have conducted your own human evaluation, please also attach your human eval resultsโwe will spot-check these before adding them to the human-eval table. |
|
|
|
""" |
|
DATA_DATASET = """## More Statistics for Online Mind2Web Benchmark |
|
""" |
|
|
|
|
|
def format_error(msg): |
|
return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>" |
|
|
|
def format_warning(msg): |
|
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>" |
|
|
|
def format_log(msg): |
|
return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>" |
|
|
|
def model_hyperlink(link, model_name): |
|
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
|
|
|