import pandas as pd import numpy as np from matplotlib.colors import LinearSegmentedColormap PAGE_MARKDOWN = """ """ PAGE_INFO = """[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-lg.svg)](https://huggingface.co/datasets/RMT-team/babilong) | [GitHub](https://github.com/booydar/babilong) | [Paper](https://arxiv.org/abs/2406.10149) | [HF Dataset](https://huggingface.co/datasets/RMT-team/babilong) | [HF Dataset 1k samples per task](https://huggingface.co/datasets/RMT-team/babilong-1k-samples) |""" LENGTHS = ['0k', '1k', '2k', '4k', '8k', '16k', '32k', '64k', '128k', '512k', '1M', '2M'] LENGTHS_32k = ['0k', '1k', '2k', '4k', '8k', '16k', '32k'] LENGTHS_128k = ['0k', '1k', '2k', '4k', '8k', '16k', '32k', '64k', '128k'] def load_results(): old_results_path = "data/leaderboard-v0_results.csv" new_results_path = "babilong/babilong_results/all_results.csv" old_results = pd.read_csv(old_results_path) new_results = pd.read_csv(new_results_path) def normalize_model_name(name): if '/' in name: name = name.split('/')[-1] return name.lower() old_results['normalized_name'] = old_results['model_name'].apply(normalize_model_name) new_results['normalized_name'] = new_results['model_name'].apply(normalize_model_name) # clean duplicate models in v0 results and new results duplicate_models = set(old_results['normalized_name']).intersection(set(new_results['normalized_name'])) old_results_filtered = old_results[~old_results['normalized_name'].isin(duplicate_models)] res = pd.concat([old_results_filtered, new_results]) res.drop('normalized_name', axis=1, inplace=True) res.replace(-1, np.nan, inplace=True) res['<=32k'] = res[LENGTHS_32k].mean(axis=1) res['<=128k'] = res[LENGTHS_128k].mean(axis=1) # Calculate the maximum length with non-NaN values for each model res['max_eval_length_idx'] = res.apply( lambda row: max([LENGTHS.index(col) for col in LENGTHS if not pd.isna(row[col])], default=-1), axis=1) res['max_eval_length'] = res['max_eval_length_idx'].apply(lambda x: LENGTHS[x]) # Sort first by max length (descending) and then by average score (descending) res.sort_values(['max_eval_length_idx', '<=128k'], ascending=[False, False], inplace=True) return res # from pandas/io/formats/style.py def relative_luminance(rgba) -> float: """ Calculate relative luminance of a color. The calculation adheres to the W3C standards (https://www.w3.org/WAI/GL/wiki/Relative_luminance) Parameters ---------- color : rgb or rgba tuple Returns ------- float The relative luminance as a value from 0 to 1 """ r, g, b = ( x / 12.92 if x <= 0.04045 else ((x + 0.055) / 1.055) ** 2.4 for x in rgba[:3] ) return 0.2126 * r + 0.7152 * g + 0.0722 * b def style_dataframe(df): """ Style a pandas DataFrame with a color gradient. """ styled_df = df.copy() numeric_columns = styled_df.columns[1:] def color_scale(val): cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256) if pd.isna(val): return 'background-color: white; color: white;' min_val = 0 max_val = 100 normalized = (val - min_val) / (max_val - min_val) rgba = cmap(normalized) text_color = 'white' if relative_luminance(rgba) < 0.408 else 'black' return f'background-color: rgba({rgba[0]*255},{rgba[1]*255},{rgba[2]*255},{rgba[3]}); color: {text_color}' styled_df = styled_df.style.map(color_scale, subset=numeric_columns) return styled_df