import asyncio import json from languages import languages from models import model_fast, models from tasks import tasks from tqdm.asyncio import tqdm_asyncio # ===== config ===== n_sentences = 30 langs_eval = languages.iloc[:30] langs_eval_detailed = languages.iloc[:2] transcription_langs_eval = languages.iloc[:10] transcription_langs_eval_detailed = languages.iloc[:5] # ===== run evaluation and aggregate results ===== async def evaluate(): print("running evaluations") results = [ task(model, original_language.bcp_47, i) for task in tasks for i in range(n_sentences) for original_language in langs_eval.itertuples() for model in models["id"] if original_language.in_benchmark and ( model == model_fast or original_language.bcp_47 in langs_eval_detailed.bcp_47.values ) ] return await tqdm_asyncio.gather(*results, miniters=1) async def main(): results = await evaluate() results = [r for group in results for r in group] with open("results.json", "w") as f: json.dump(results, f, indent=2, ensure_ascii=False) if __name__ == "__main__": asyncio.run(main())