|
import asyncio |
|
import json |
|
|
|
import numpy as np |
|
import pandas as pd |
|
from tqdm.asyncio import tqdm_asyncio |
|
|
|
from languages import languages |
|
from models import models |
|
from tasks import tasks |
|
|
|
|
|
|
|
n_sentences = 10 |
|
n_languages = 15 |
|
n_models = 20 |
|
|
|
|
|
|
|
|
|
async def evaluate(): |
|
print("running evaluations") |
|
results = [ |
|
task(model, lang.bcp_47, i) |
|
for task in tasks |
|
for i in range(n_sentences) |
|
for lang in languages.iloc[:n_languages].itertuples() |
|
for model in models["id"].iloc[:n_models] |
|
] |
|
return await tqdm_asyncio.gather(*results, miniters=1) |
|
|
|
def serialize(df): |
|
return df.replace({np.nan: None, pd.NA: None}).to_dict(orient="records") |
|
|
|
async def main(): |
|
models["creation_date"] = models["creation_date"].apply(lambda x: x.isoformat()) |
|
results = await evaluate() |
|
results = [r for group in results for r in group] |
|
results = { |
|
"languages": serialize(languages), |
|
"models": serialize(models), |
|
"scores": results, |
|
} |
|
with open("results.json", "w") as f: |
|
json.dump(results, f, indent=2, ensure_ascii=False) |
|
|
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |
|
|