File size: 3,493 Bytes
2dba94f
 
 
 
 
 
 
 
 
 
e3f57bf
2dba94f
 
 
4dcba74
2dba94f
 
4dcba74
2dba94f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e045c4a
 
 
 
 
 
2dba94f
 
 
 
 
 
 
 
 
 
 
 
 
e3a5d5a
 
2dba94f
 
 
e045c4a
 
 
 
 
2dba94f
 
 
 
e045c4a
 
2dba94f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os

import gradio as gr
import pandas as pd
import numpy as np

from apscheduler.schedulers.background import BackgroundScheduler

# InfoStrings
from scorer import question_scorer
from content import format_error, format_warning, format_log, TITLE, LINKS, INTRODUCTION_TEXT, LEADERBOARD_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION

TOKEN = os.environ.get("TOKEN", None)

OWNER="Online-Mind2Web"
# api = HfApi()

YEAR_VERSION = "2025"

LOCAL_DEBUG = True

# Display the results
def get_dataframe_from_results(eval_path):
    df = pd.read_csv(eval_path)
    df = df.sort_values(by=["Average SR"], ascending=False)
    for format_column in ['Easy', 'Medium', 'Hard', 'Average SR']:
        df[format_column] = df[format_column].map('{:.1f}'.format)
    # df["Average SR"] = df["Average SR"].map('{:.1f}'.format)
    return df

auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv')
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv')


TYPES = ["str", "str", "str", "str", "number", "number", "number", "number", "str"]

def refresh():
    auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv')
    human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv')
    return auto_eval_dataframe_test, human_eval_dataframe_test

def upload_file(files):
    file_paths = [file.name for file in files]
    return file_paths


demo = gr.Blocks(css="""
#human-leaderboard-table {
    width: auto; /* allow auto sizing */
    min-width: calc(100% + 20px); /* extend a little beyond the content */
}
""")
with demo:
    gr.HTML(TITLE)
    gr.HTML(LINKS)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Row():
        with gr.Accordion("πŸ“™ Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                elem_id="citation-button",
                lines=10,
            ) 
    gr.Markdown(LEADERBOARD_TEXT, elem_classes="markdown-text")
    # gr.HTML(LEADERBOARD_HTML)
    with gr.Tab("Human Evaluation", elem_id="human-tab", id=1):
        human_leaderboard_table_test = gr.components.Dataframe(
            value=human_eval_dataframe_test, datatype=TYPES, interactive=False,
            # column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"]
            # interactive=False,
            # height=700,
            # column_widths=[190, 140, 75, 75, 50, 50, 50, 50, 75],
            wrap=False
        )
    with gr.Tab("Auto Evaluation", elem_id="auto-tab", id=2):
        auto_leaderboard_table_test = gr.components.Dataframe(
            value=auto_eval_dataframe_test, datatype=TYPES, interactive=False,
            wrap=False
            # column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"]
        )

    with gr.Tab("Submission Guideline", elem_id="submit-tab", id=3):
        with gr.Row():
            gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")

    refresh_button = gr.Button("Refresh")
    refresh_button.click(
        refresh,
        inputs=[],
        outputs=[
            auto_leaderboard_table_test,
            human_leaderboard_table_test,
        ],
    )

scheduler = BackgroundScheduler()
scheduler.start()
demo.launch(debug=True)