import streamlit as st from draw_utils import PAGE_MARKDOWN, PAGE_INFO, LENGTHS from draw_utils import load_results, style_dataframe st.set_page_config(layout="wide", page_title="Leaderboard App") st.markdown(PAGE_MARKDOWN, unsafe_allow_html=True) def draw_leaderboard(): df = load_results() tasks = ['avg'] + [f"qa{i}" for i in range(1, 11)] columns = ["model_name", "<=32k", "<=128k"] + LENGTHS st.title("🔎📚🪡📚❓ BABILong Leaderboard 🏆") st.markdown(PAGE_INFO) st.subheader("Evaluation results:") st.text('Each tab corresponds to a task, avg - averaged scores over qa1-5 tasks.') st.markdown('Predictions of all evaluated models: ' '[BABILong evals](https://huggingface.co/datasets/RMT-team/babilong_evals)') search_term = st.text_input("Search models:", "") tabs = st.tabs([str(task) for task in tasks]) for i, tab in enumerate(tabs): with tab: task_df = df[df.task == tasks[i]][columns] if search_term: task_df = task_df[task_df['model_name'].str.contains(search_term, case=False)] task_df.reset_index(drop=True, inplace=True) row_height = 35 height = (len(task_df) + 1) * row_height styled_df = style_dataframe(task_df).format(precision=1) st.dataframe( styled_df, width=1030, height=height, ) if __name__ == "__main__": draw_leaderboard()