Alina Lozovskaya
Add citations
e3a07b7
raw
history blame
5.64 kB
import os
import sys
import time
import gradio as gr
import yaml
from pathlib import Path
from loguru import logger
from huggingface_hub import whoami
from yourbench_space.config import generate_and_save_config
from yourbench_space.utils import (
CONFIG_PATH,
UPLOAD_DIRECTORY,
SubprocessManager,
save_files,
)
# Short project description
project_description = """
# YourBench 🚀
A Dynamic Benchmark Generation Framework
- Produce diverse, up-to-date questions from real-world source documents
- Seamlessly handles ingestion, summarization, and multi-hop chunking for large or specialized datasets
- Emulates real-world usage scenarios by creating fresh tasks that guard against memorized knowledge
- Out-of-the-box pipeline stages, plus an easy plugin mechanism to accommodate custom models or domain constraints
[📖 Github Page](https://github.com/huggingface/yourbench/tree/v0.2-alpha-space)
"""
UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
logger.remove()
logger.add(sys.stderr, level="INFO")
command = ["uv", "run", "yourbench", f"--config={CONFIG_PATH}"]
manager = SubprocessManager(command)
# Create a citation section
docs_path = Path(__file__).parent / "docs.md"
# Read the file safely
if docs_path.exists():
docs_content = docs_path.read_text()
else:
docs_content = "# Citation\n\nDocumentation file not found."
citation_content = docs_content.split("# Citation")[-1].strip()
def generate_and_return(hf_org, hf_prefix):
"""Handles config generation and validates file existence before enabling download"""
generate_and_save_config(hf_org, hf_prefix)
# Wait until the config file is actually created
for _ in range(5):
if CONFIG_PATH.exists():
break
time.sleep(0.5)
if CONFIG_PATH.exists():
return "✅ Config saved!", gr.update(value=str(CONFIG_PATH), visible=True, interactive=True)
else:
return "❌ Config generation failed.", gr.update(visible=False, interactive=False)
def prepare_task(oauth_token: gr.OAuthToken | None, model_token: str):
"""Prepares and starts the subprocess with environment variables."""
new_env = os.environ.copy()
if oauth_token:
new_env["HF_TOKEN"] = oauth_token.token
new_env["MODEL_API_KEY"] = model_token
manager.start_process(custom_env=new_env)
def update_hf_org_dropdown(oauth_token: gr.OAuthToken | None):
"""Updates the dropdown with the user's Hugging Face organizations"""
if oauth_token is None:
print("Please deploy this on Spaces and log in to view the list of available organizations")
return gr.Dropdown([], label="Organization")
try:
user_info = whoami(oauth_token.token)
org_names = [org["name"] for org in user_info.get("orgs", [])]
user_name = user_info.get("name", "Unknown User")
org_names.insert(0, user_name)
return gr.Dropdown(org_names, value=user_name, label="Organization")
except Exception as e:
print(f"Error retrieving user info: {e}")
return gr.Dropdown([], label="Organization")
def enable_button(files):
"""Enables the button if files are uploaded"""
return gr.update(interactive=bool(files))
with gr.Blocks() as app:
gr.Markdown(project_description)
gr.Markdown("## YourBench Setup")
with gr.Row():
login_btn = gr.LoginButton()
with gr.Tab("Setup"):
with gr.Row():
with gr.Accordion("Hugging Face Settings"):
hf_org_dropdown = gr.Dropdown(choices=[], label="Organization", allow_custom_value=True)
app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
hf_dataset_prefix = gr.Textbox(label="Dataset Prefix", value="yourbench", info="Prefix applied to all datasets")
with gr.Accordion("Upload documents"):
file_input = gr.File(label="Upload text files", file_count="multiple", file_types=[".txt", ".md", ".html"])
output = gr.Textbox(label="Log")
file_input.upload(lambda files: save_files([file.name for file in files]), file_input, output)
preview_button = gr.Button("Generate New Config", interactive=False)
log_message = gr.Textbox(label="Log Message", visible=True)
download_button = gr.File(label="Download Config", visible=False, interactive=False)
file_input.change(enable_button, inputs=file_input, outputs=preview_button)
preview_button.click(
generate_and_return,
inputs=[hf_org_dropdown, hf_dataset_prefix],
outputs=[log_message, download_button],
)
with gr.Tab("Run Generation"):
log_output = gr.Code(label="Log Output", language=None, lines=20, interactive=False)
log_timer = gr.Timer(0.05, active=True)
log_timer.tick(manager.read_and_get_output, outputs=log_output)
with gr.Row():
process_status = gr.Checkbox(label="Process Status", interactive=False)
status_timer = gr.Timer(0.05, active=True)
status_timer.tick(manager.is_running, outputs=process_status)
with gr.Row():
start_button = gr.Button("Start Task")
start_button.click(prepare_task, inputs=[hf_org_dropdown])
stop_button = gr.Button("Stop Task")
stop_button.click(manager.stop_process)
kill_button = gr.Button("Kill Task")
kill_button.click(manager.kill_process)
# Citation section at the end
with gr.Accordion("📜 Citation", open=False):
gr.Markdown(citation_content)
app.launch(allowed_paths=["/app"])