Spaces:

TIGER-Lab
/

MMEB-Leaderboard

Running

App Files Files Community

v2 update test

#41

by MINGYISU - opened 2 days ago

base: refs/heads/main

←

from: refs/pr/41

Discussion Files changed

+49

-131

Files changed (5) hide show

.gitignore +2 -0
results.csv +0 -31
results.jsonl +30 -0
urls.csv +0 -26
utils.py +17 -74

.gitignore CHANGED Viewed

@@ -11,3 +11,5 @@ eval-results/
 eval-queue-bk/
 eval-results-bk/
 logs/

 eval-queue-bk/
 eval-results-bk/
 logs/
+.gitignore
+.gradio

results.csv DELETED Viewed

@@ -1,31 +0,0 @@
-Models,Model Size(B),Data Source,Overall,Classification,VQA,Retrieval,Grounding
-clip-vit-large-patch14,0.428,TIGER-Lab,37.8,42.8,9.1,53.0,51.8
-blip2-opt-2.7b,3.74,TIGER-Lab,25.2,27.0,4.2,33.9,47.0
-siglip-base-patch16-224,0.203,TIGER-Lab,34.8,40.3,8.4,31.6,59.5
-open_clip-ViT-L/14,0.428,TIGER-Lab,39.7,47.8,10.9,52.3,53.3
-UniIR (BLIP_FF),0.247,TIGER-Lab,42.8,42.1,15.0,60.1,62.2
-UniIR (CLIP_SF),0.428,TIGER-Lab,44.7,44.3,16.2,61.8,65.3
-e5-v,8.36,TIGER-Lab,13.3,21.8,4.9,11.5,19.0
-Magiclens,0.428,TIGER-Lab,27.8,38.8,8.3,35.4,26.0
-CLIP-FT,0.428,TIGER-Lab,45.4,55.2,19.7,53.2,62.2
-OpenCLIP-FT,0.428,TIGER-Lab,47.2,56.0,21.9,55.4,64.1
-VLM2Vec (Phi-3.5-V-FT),4.15,TIGER-Lab,55.9,52.8,50.3,57.8,72.3
-VLM2Vec (Phi-3.5-V-LoRA),4.15,TIGER-Lab,60.1,54.8,54.9,62.3,79.5
-VLM2Vec (LLaVA-1.6-LoRA-LowRes),7.57,TIGER-Lab,55.0,54.7,50.3,56.2,64.0
-VLM2Vec (LLaVA-1.6-LoRA-HighRes),7.57,TIGER-Lab,62.9,61.2,49.9,67.4,86.1
-MMRet-MLLM (LLaVA-1.6),7.57,Self-Reported,44.0,47.2,18.4,56.5,62.2
-MMRet-MLLM (FT),7.57,Self-Reported,64.1,56.0,57.4,69.9,83.6
-mmE5-mllama-11b-instruct,10.6,Self-Reported,69.8,67.6,62.6,71.0,89.6
-mmE5 (w/ 560K synthetic data),10.6,Self-Reported,58.6,60.6,55.7,54.7,72.4
-MM-Embed,8.18,Self-Reported,50.0,48.1,32.3,63.8,57.8
-gme-Qwen2-VL-2B-Instruct,2.21,Self-Reported,55.8,56.9,41.2,67.8,53.4
-VLM2Vec (Qwen2-VL-7B-LoRA-HighRes),8.29,TIGER-Lab,65.8,62.6,57.8,69.9,81.7
-VLM2Vec (Qwen2-VL-2B-LoRA-HighRes),2.21,TIGER-Lab,59.3,59.0,49.4,65.4,73.4
-LLaVE-7B,8.03,Self-Reported,70.3,65.7,65.4,70.9,91.9
-LLaVE-2B,1.95,Self-Reported,65.2,62.1,60.2,65.2,84.9
-LLaVE-0.5B,0.894,Self-Reported,59.1,57.4,50.3,59.8,82.9
-UniME(LLaVA-OneVision-7B-LoRA-Res336),8.03,Self-Reported,70.7,66.8,66.6,70.5,90.9
-UniME(LLaVA-1.6-7B-LoRA-LowRes),7.57,Self-Reported,66.6,60.6,52.9,67.9,85.1
-UniME(Phi-3.5-V-LoRA),4.2,Self-Reported,64.2,54.8,55.9,64.5,81.8
-QQMM-embed,8.297,Self-Reported,72.175,70.07,69.52,71.175,87.075
-B3,8.29,Self-Reported,72.0,70.0,66.5,74.1,84.6

results.jsonl ADDED Viewed

	@@ -0,0 +1,30 @@

+{"Models":"B3","Model Size(B)":8.29,"Data Source":"Self-Reported","Overall":72.0,"Classification":70.0,"VQA":66.5,"Retrieval":74.1,"Grounding":84.6,"URL":"https:\/\/huggingface.co\/raghavlite\/B3_Qwen2_7B"}
+{"Models":"CLIP-FT","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":45.4,"Classification":55.2,"VQA":19.7,"Retrieval":53.2,"Grounding":62.2,"URL":"https:\/\/doi.org\/10.48550\/arXiv.2103.00020"}
+{"Models":"LLaVE-0.5B","Model Size(B)":0.894,"Data Source":"Self-Reported","Overall":59.1,"Classification":57.4,"VQA":50.3,"Retrieval":59.8,"Grounding":82.9,"URL":"https:\/\/huggingface.co\/zhibinlan\/LLaVE-0.5B"}
+{"Models":"LLaVE-2B","Model Size(B)":1.95,"Data Source":"Self-Reported","Overall":65.2,"Classification":62.1,"VQA":60.2,"Retrieval":65.2,"Grounding":84.9,"URL":"https:\/\/huggingface.co\/zhibinlan\/LLaVE-2B"}
+{"Models":"LLaVE-7B","Model Size(B)":8.03,"Data Source":"Self-Reported","Overall":70.3,"Classification":65.7,"VQA":65.4,"Retrieval":70.9,"Grounding":91.9,"URL":"https:\/\/huggingface.co\/zhibinlan\/LLaVE-7B"}
+{"Models":"MM-Embed","Model Size(B)":8.18,"Data Source":"Self-Reported","Overall":50.0,"Classification":48.1,"VQA":32.3,"Retrieval":63.8,"Grounding":57.8,"URL":"https:\/\/huggingface.co\/nvidia\/MM-Embed"}
+{"Models":"MMRet-MLLM (FT)","Model Size(B)":7.57,"Data Source":"Self-Reported","Overall":64.1,"Classification":56.0,"VQA":57.4,"Retrieval":69.9,"Grounding":83.6,"URL":"https:\/\/huggingface.co\/JUNJIE99\/MMRet-large"}
+{"Models":"MMRet-MLLM (LLaVA-1.6)","Model Size(B)":7.57,"Data Source":"Self-Reported","Overall":44.0,"Classification":47.2,"VQA":18.4,"Retrieval":56.5,"Grounding":62.2,"URL":"https:\/\/huggingface.co\/JUNJIE99\/MMRet-large"}
+{"Models":"Magiclens","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":27.8,"Classification":38.8,"VQA":8.3,"Retrieval":35.4,"Grounding":26.0,"URL":"https:\/\/github.com\/google-deepmind\/magiclens"}
+{"Models":"OpenCLIP-FT","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":47.2,"Classification":56.0,"VQA":21.9,"Retrieval":55.4,"Grounding":64.1,"URL":"https:\/\/doi.org\/10.48550\/arXiv.2212.07143"}
+{"Models":"QQMM-embed","Model Size(B)":8.297,"Data Source":"Self-Reported","Overall":72.175,"Classification":70.07,"VQA":69.52,"Retrieval":71.175,"Grounding":87.075,"URL":"https:\/\/github.com\/QQ-MM\/QQMM-embed"}
+{"Models":"UniIR (BLIP_FF)","Model Size(B)":0.247,"Data Source":"TIGER-Lab","Overall":42.8,"Classification":42.1,"VQA":15.0,"Retrieval":60.1,"Grounding":62.2,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/UniIR"}
+{"Models":"UniIR (CLIP_SF)","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":44.7,"Classification":44.3,"VQA":16.2,"Retrieval":61.8,"Grounding":65.3,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/UniIR"}
+{"Models":"UniME(LLaVA-1.6-7B-LoRA-LowRes)","Model Size(B)":7.57,"Data Source":"Self-Reported","Overall":66.6,"Classification":60.6,"VQA":52.9,"Retrieval":67.9,"Grounding":85.1,"URL":"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-LLaVA-1.6-7B"}
+{"Models":"UniME(LLaVA-OneVision-7B-LoRA-Res336)","Model Size(B)":8.03,"Data Source":"Self-Reported","Overall":70.7,"Classification":66.8,"VQA":66.6,"Retrieval":70.5,"Grounding":90.9,"URL":"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-LLaVA-OneVision-7B"}
+{"Models":"UniME(Phi-3.5-V-LoRA)","Model Size(B)":4.2,"Data Source":"Self-Reported","Overall":64.2,"Classification":54.8,"VQA":55.9,"Retrieval":64.5,"Grounding":81.8,"URL":"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-Phi3.5-V-4.2B"}
+{"Models":"VLM2Vec (LLaVA-1.6-LoRA-HighRes)","Model Size(B)":7.57,"Data Source":"TIGER-Lab","Overall":62.9,"Classification":61.2,"VQA":49.9,"Retrieval":67.4,"Grounding":86.1,"URL":"https://huggingface.co/TIGER-Lab/VLM2Vec-LLaVa-Next"}
+{"Models":"VLM2Vec (LLaVA-1.6-LoRA-LowRes)","Model Size(B)":7.57,"Data Source":"TIGER-Lab","Overall":55.0,"Classification":54.7,"VQA":50.3,"Retrieval":56.2,"Grounding":64.0,"URL":"https://huggingface.co/TIGER-Lab/VLM2Vec-LLaVa-Next"}
+{"Models":"VLM2Vec (Phi-3.5-V-FT)","Model Size(B)":4.15,"Data Source":"TIGER-Lab","Overall":55.9,"Classification":52.8,"VQA":50.3,"Retrieval":57.8,"Grounding":72.3,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Full"}
+{"Models":"VLM2Vec (Phi-3.5-V-LoRA)","Model Size(B)":4.15,"Data Source":"TIGER-Lab","Overall":60.1,"Classification":54.8,"VQA":54.9,"Retrieval":62.3,"Grounding":79.5,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Full"}
+{"Models":"VLM2Vec (Qwen2-VL-2B-LoRA-HighRes)","Model Size(B)":2.21,"Data Source":"TIGER-Lab","Overall":59.3,"Classification":59.0,"VQA":49.4,"Retrieval":65.4,"Grounding":73.4,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-2B"}
+{"Models":"VLM2Vec (Qwen2-VL-7B-LoRA-HighRes)","Model Size(B)":8.29,"Data Source":"TIGER-Lab","Overall":65.8,"Classification":62.6,"VQA":57.8,"Retrieval":69.9,"Grounding":81.7,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-7B"}
+{"Models":"blip2-opt-2.7b","Model Size(B)":3.74,"Data Source":"TIGER-Lab","Overall":25.2,"Classification":27.0,"VQA":4.2,"Retrieval":33.9,"Grounding":47.0,"URL":"https:\/\/huggingface.co\/Salesforce\/blip2-opt-2.7b"}
+{"Models":"clip-vit-large-patch14","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":37.8,"Classification":42.8,"VQA":9.1,"Retrieval":53.0,"Grounding":51.8,"URL":"https:\/\/huggingface.co\/openai\/clip-vit-large-patch14"}
+{"Models":"e5-v","Model Size(B)":8.36,"Data Source":"TIGER-Lab","Overall":13.3,"Classification":21.8,"VQA":4.9,"Retrieval":11.5,"Grounding":19.0,"URL":"https:\/\/huggingface.co\/royokong\/e5-v"}
+{"Models":"gme-Qwen2-VL-2B-Instruct","Model Size(B)":2.21,"Data Source":"Self-Reported","Overall":55.8,"Classification":56.9,"VQA":41.2,"Retrieval":67.8,"Grounding":53.4,"URL":"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-2B-Instruct"}
+{"Models":"mmE5 (w\/ 560K synthetic data)","Model Size(B)":10.6,"Data Source":"Self-Reported","Overall":58.6,"Classification":60.6,"VQA":55.7,"Retrieval":54.7,"Grounding":72.4,"URL":"https:\/\/huggingface.co\/intfloat\/mmE5-mllama-11b-instruct"}
+{"Models":"mmE5-mllama-11b-instruct","Model Size(B)":10.6,"Data Source":"Self-Reported","Overall":69.8,"Classification":67.6,"VQA":62.6,"Retrieval":71.0,"Grounding":89.6,"URL":"https:\/\/huggingface.co\/intfloat\/mmE5-mllama-11b-instruct"}
+{"Models":"open_clip-ViT-L\/14","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":39.7,"Classification":47.8,"VQA":10.9,"Retrieval":52.3,"Grounding":53.3,"URL":"https:\/\/github.com\/mlfoundations\/open_clip"}
+{"Models":"siglip-base-patch16-224","Model Size(B)":0.203,"Data Source":"TIGER-Lab","Overall":34.8,"Classification":40.3,"VQA":8.4,"Retrieval":31.6,"Grounding":59.5,"URL":"https:\/\/huggingface.co\/google\/siglip-base-patch16-224"}

urls.csv DELETED Viewed

@@ -1,26 +0,0 @@
-Models,URL
-clip-vit-large-patch14,https://huggingface.co/openai/clip-vit-large-patch14
-blip2-opt-2.7b,https://huggingface.co/Salesforce/blip2-opt-2.7b
-siglip-base-patch16-224,https://huggingface.co/google/siglip-base-patch16-224
-open_clip-ViT-L/14,https://github.com/mlfoundations/open_clip
-e5-v,https://huggingface.co/royokong/e5-v
-Magiclens,https://github.com/google-deepmind/magiclens
-MMRet,https://huggingface.co/JUNJIE99/MMRet-large
-VLM2Vec-Phi-3.5-v,https://huggingface.co/TIGER-Lab/VLM2Vec-Full
-VLM2Vec,https://github.com/TIGER-AI-Lab/VLM2Vec
-VLM2Vec (Qwen2-VL-7B-LoRA-HighRes),https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-7B
-VLM2Vec (Qwen2-VL-2B-LoRA-HighRes),https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-2B
-UniIR,https://huggingface.co/TIGER-Lab/UniIR
-OpenCLIP-FT,https://doi.org/10.48550/arXiv.2212.07143
-CLIP-FT,https://doi.org/10.48550/arXiv.2103.00020
-mmE5,https://huggingface.co/intfloat/mmE5-mllama-11b-instruct
-gme-Qwen2-VL-2B-Instruct,https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct
-MM-Embed,https://huggingface.co/nvidia/MM-Embed
-LLaVE-7B,https://huggingface.co/zhibinlan/LLaVE-7B
-LLaVE-2B,https://huggingface.co/zhibinlan/LLaVE-2B
-LLaVE-0.5B,https://huggingface.co/zhibinlan/LLaVE-0.5B
-UniME(LLaVA-OneVision-7B-LoRA-Res336),https://huggingface.co/DeepGlint-AI/UniME-LLaVA-OneVision-7B
-UniME(LLaVA-1.6-7B-LoRA-LowRes),https://huggingface.co/DeepGlint-AI/UniME-LLaVA-1.6-7B
-UniME(Phi-3.5-V-LoRA),https://huggingface.co/DeepGlint-AI/UniME-Phi3.5-V-4.2B
-QQMM-embed,https://github.com/QQ-MM/QQMM-embed
-B3,https://huggingface.co/raghavlite/B3_Qwen2_7B

utils.py CHANGED Viewed

@@ -25,7 +25,7 @@ DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown', 'number', 'number',
 SUBMISSION_NAME = "MMEB"
 SUBMISSION_URL = os.path.join("https://huggingface.co/spaces/TIGER-Lab/", SUBMISSION_NAME)
 FILE_NAME = "results.csv"
-CSV_DIR = "./results.csv"
 COLUMN_NAMES = MODEL_INFO
@@ -103,99 +103,42 @@ Please send us an email at m7su@uwaterloo.ca, attaching the JSON file. We will r
 def create_hyperlinked_names(df):
     def convert_url(url, model_name):
-        return f'<a href="{url}">{model_name}</a>'
-    def add_link_to_model_name(model_name):
-        if "VLM2Vec (Phi-3.5-V-" in model_name:
-            url = MODEL_URLS["VLM2Vec-Phi-3.5-v"]
-            return convert_url(url, model_name)
-        if "VLM2Vec (LLaVA-1.6-LoRA-" in model_name:
-            url = MODEL_URLS["VLM2Vec"]
-            return convert_url(url, model_name)
-        if "UniIR" in model_name:
-            url = MODEL_URLS["UniIR"]
-            return convert_url(url, model_name)
-        if "mmE5" in model_name:
-            url = MODEL_URLS["mmE5"]
-            return convert_url(url, model_name)
-        if "MMRet" in model_name:
-            url = MODEL_URLS["MMRet"]
-            return convert_url(url, model_name)
-        return convert_url(MODEL_URLS[model_name], model_name) if model_name in MODEL_URLS else model_name
     df = df.copy()
-    df['Models'] = df['Models'].apply(add_link_to_model_name)
     return df
-def fetch_data(url: str) -> pd.DataFrame:
-    # fetch the leaderboard data
-    if url is None:
         raise ValueError("URL Not Provided")
-    url = f"https://huggingface.co/spaces/TIGER-Lab/MMEB/resolve/main/{url}"
     print(f"Fetching data from {url}")
     response = requests.get(url)
     if response.status_code != 200:
         raise requests.HTTPError(f"Failed to fetch data: HTTP status code {response.status_code}")
-    return pd.read_csv(io.StringIO(response.text))
-def get_urls(csv: str='urls.csv') -> dict:
-    urls = fetch_data(csv)
-    return dict(zip(urls['Models'], urls['URL']))
-MODEL_URLS = get_urls()
-def get_df(csv="results.csv"):
-    df = fetch_data(csv)
-    df.to_csv(CSV_DIR, index=False) # update local file
     df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
     df = df.sort_values(by=['Overall'], ascending=False)
     df = create_hyperlinked_names(df)
     df['Rank'] = range(1, len(df) + 1)
     return df
-def add_new_eval(input_file):
-    if input_file is None:
-        return "Error! Empty file!"
-    # Load the input json file
-    upload_data = json.loads(input_file)
-    print("upload_data:\n", upload_data)
-    data_row = [f'{upload_data["Model"]}']
-    for col in ['Overall', 'Model Size(B)'] + TASKS:
-        if not col in upload_data.keys():
-            return f"Error! Missing {col} column!"
-        data_row += [upload_data[col]]
-    if 'URL' in upload_data.keys():
-        MODEL_URLS[upload_data['Model']] = upload_data['URL']
-    print("data_row:\n", data_row)
-    submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL,
-                                 use_auth_token=HF_TOKEN, repo_type="space")
-    submission_repo.git_pull()
-    # Track submitted models
-    already_submitted = []
-    with open(CSV_DIR, mode='r') as file:
-        reader = csv.reader(file, delimiter=',')
-        for row in reader:
-            already_submitted.append(row[0])
-    # if not in the existing models list, add it to the csv file
-    if data_row[0] not in already_submitted:
-        with open(CSV_DIR, mode='a', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerow(data_row)
-        submission_repo.push_to_hub()
-        print('Submission Successful')
-    else:
-        print('The model already exists in the leaderboard!')
 def refresh_data():
     df = get_df()
-    MODEL_URLS = get_urls()
     return df[COLUMN_NAMES]
 def search_and_filter_models(df, query, min_size, max_size):
     filtered_df = df.copy()

 SUBMISSION_NAME = "MMEB"
 SUBMISSION_URL = os.path.join("https://huggingface.co/spaces/TIGER-Lab/", SUBMISSION_NAME)
 FILE_NAME = "results.csv"
+CSV_DIR = "results.csv"
 COLUMN_NAMES = MODEL_INFO
 def create_hyperlinked_names(df):
     def convert_url(url, model_name):
+        return f'<a href="{url}">{model_name}</a>' if url is not None else model_name
+    def add_link_to_model_name(row):
+        row['Models'] = convert_url(row['URL'], row['Models'])
+        return row
     df = df.copy()
+    df = df.apply(add_link_to_model_name, axis=1)
     return df
+def fetch_data(file: str) -> pd.DataFrame:
+    # fetch the leaderboard data from remote
+    if file is None:
         raise ValueError("URL Not Provided")
+    url = f"https://huggingface.co/spaces/TIGER-Lab/MMEB/resolve/main/{file}"
     print(f"Fetching data from {url}")
     response = requests.get(url)
     if response.status_code != 200:
         raise requests.HTTPError(f"Failed to fetch data: HTTP status code {response.status_code}")
+    return pd.read_json(io.StringIO(response.text), orient='records', lines=True)
+def get_df(file="results.jsonl"):
+    df = fetch_data(file)
+    print(df.columns)
+    print('URL' in df.columns)
+    print(df)
     df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
     df = df.sort_values(by=['Overall'], ascending=False)
     df = create_hyperlinked_names(df)
     df['Rank'] = range(1, len(df) + 1)
     return df
 def refresh_data():
     df = get_df()
     return df[COLUMN_NAMES]
 def search_and_filter_models(df, query, min_size, max_size):
     filtered_df = df.copy()