File size: 1,203 Bytes
da6e1bc
 
 
 
723f963
da6e1bc
723f963
da6e1bc
 
 
 
eaf2d97
da6e1bc
 
 
 
 
 
3ed02d5
da6e1bc
 
 
 
 
 
 
3ed02d5
da6e1bc
 
 
 
 
 
 
 
3ed02d5
723f963
da6e1bc
 
d91b022
da6e1bc
2c21cf7
da6e1bc
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import asyncio
import json

from languages import languages
from models import model_fast, models
from tasks import tasks
from tqdm.asyncio import tqdm_asyncio

# ===== config =====

n_sentences = 30
langs_eval = languages.iloc[:30]
langs_eval_detailed = languages.iloc[:2]
transcription_langs_eval = languages.iloc[:10]
transcription_langs_eval_detailed = languages.iloc[:5]

# ===== run evaluation and aggregate results =====


async def evaluate():
    print("running evaluations")
    results = [
        task(model, original_language.bcp_47, i)
        for task in tasks
        for i in range(n_sentences)
        for original_language in langs_eval.itertuples()
        for model in models["id"]
        if original_language.in_benchmark
        and (
            model == model_fast
            or original_language.bcp_47 in langs_eval_detailed.bcp_47.values
        )
    ]
    return await tqdm_asyncio.gather(*results, miniters=1)



async def main():
    results = await evaluate()
    results = [r for group in results for r in group]
    with open("results.json", "w") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)


if __name__ == "__main__":
    asyncio.run(main())