diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -1,48002 +1,56240 @@ -[ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5679608237702286, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.746881923400435, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4438455475739657, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6320800718582147, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5894973558751632, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7562097956860054, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3846086976522069, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5835344719191324, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4804215535486392, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6694735319785804, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2511517944602615, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4484633445384819, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5820808184424484, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.73788733854976, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5749603738163459, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7240488251574404, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5617561349997696, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7132694856647042, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2963216580569375, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5101500486835966, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15317719477157257, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.38800976493585004, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6001453932849357, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.762029391170019, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.30676942927198475, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4968492831219663, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.32063971770635635, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5206258401513325, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39086127104761287, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6239956806265569, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3020679767949182, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5246291817407542, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.29261990846502584, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5207965578474395, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23343658187420896, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5188968707275573, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2920008662633279, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.47119207959541226, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2596939072050362, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4394574387008692, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4273817965049865, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6016204186733703, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2777551012631926, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.49423240120783246, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.7964573357809173, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.8458636471716781, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.34633672321253084, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5378805625051344, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.3582301850807646, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5380305837807603, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.300740577257699, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5272774705181614, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3099603853356145, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5209233176748354, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.35580399268816465, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5392592206305507, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.39317381456022266, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6026058740561834, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.48930936408255293, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.699085629239476, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3963410285961713, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.613166190285915, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.44294247711132617, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5915660675216782, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.3756985486608933, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5991443770283833, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5009456904181451, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6893719644090858, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.18273944860385094, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.44261865187418153, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2153742037697241, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4581737688885401, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.3372953649368346, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.5482505380106469, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.28528905353056333, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4885812318466243, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2935204022158406, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4867597973247361, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.2929684584911775, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5038324436049059, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4034224234291925, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5736798834726872, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.1077205146963877, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.428338145564396, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.22327767951697297, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4063556880747369, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.2572733200413211, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4520014138562526, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.40311197004738203, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5788525108956781, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.45313578977486535, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.6160993561903745, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.2651736858432996, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4491383344282561, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.34545319957597864, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5727052860304503, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15815751066481462, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5152611872266766, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12903696060775005, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.456225988032654, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.024459391267874976, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12351824822447692, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.46822754470803873, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3465147345201782, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.08516700886866406, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4091252890943268, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.19194937906573872, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5477665664300843, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4370196290761142, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20669086265781264, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5076721272198604, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17630490037560695, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.48116430160978857, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4122750002638689, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15412719160788987, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5010353699512481, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12369892692249995, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.44549610902403686, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12560672881768975, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4969560260291519, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17077058518804336, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5022008374701596, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10784756064735967, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4427230465401631, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.06735571462439276, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.38102852892512806, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.42723260976616784, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1694466724647263, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4902502031746037, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3532931581623198, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.175396614619324, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.49736499605529066, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.15154395847232716, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.46053919348995803, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4041678259311437, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1290514243115152, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4766581477336301, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.08273178236238297, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.36399666460809255, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.12601482779921785, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.43595665254608706, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.08214106568089705, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3969463877642616, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0744904632040495, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4111163205685468, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.12894104034845807, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4486368934849452, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.10070927557742705, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.43718220262892105, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0772718393063023, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.4203683137304257, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0756907193511249, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4138725093679467, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21748353646757182, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4462746462826943, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4179644538349004, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.10505106462290037, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4474870048911137, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.0009218289085545725, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.15653859793617866, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.43177798053127925, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0891537192318598, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3970634926176537, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0950136506275681, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4372017487229785, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1259356760989446, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.44568274520971096, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.16322494183480127, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4815584993817062, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0904087252785689, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.41830513174690515, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.21351902664706998, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5130443042033361, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.16269986423611488, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.06939838145153245, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3371547585108182, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.1691386174483793, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4920789340026317, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.14944432524273302, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4972796478830659, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.09793316925795417, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4297577431879659, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.38870674200492367, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6484380084879691, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4923751299732868, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6853756490381199, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3996712647649035, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6353525755760105, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5115346945020283, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7037574715738644, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.017834618169115152, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.05927156798818119, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.23904922011090457, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3399292774084129, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6152980280400979, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8311281590297233, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.005449161724399305, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.026158029267484995, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24508104771894088, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5725552336126134, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20801258614305904, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.26703508536995574, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.35315040956049437, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.625895188503691, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11133996756497437, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4410280353998367, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17743299460161885, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.43071271897416463, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16052654068024738, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41580120868053494, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.05963579607071745, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.31139762378406344, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.006734847287559362, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.03408121951468736, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.09880177230676102, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3297638349619511, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2377604053257556, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5662768009060447, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.22573408807826306, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5444672928195973, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10742716472890976, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.42694859148910824, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14745870033404418, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.475170637938921, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.21665407194210906, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4344921442639243, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.37994652561206577, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6464467277069994, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.09362261118571368, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.3452056942265759, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.18917620656425485, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4346170232980484, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.420450507904553, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.6503146347305717, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.24894072982768842, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5212235893093335, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.21147734744561483, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.41020178654369294, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.2329856851831642, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5405751250637106, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.41756686236967944, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5616829345739638, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.38189567401226293, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6154314825900052, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.2126707920684064, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4659908460634765, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.23240102389974368, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.4973274282641141, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.17979384730979156, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4177311931467539, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1702602472176709, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4366640707779677, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.933651069586263, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.9586507529693243, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3816408219023713, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5784105768028126, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.18398226639192106, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.37285010531146734, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.26958884543190903, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5631664732610485, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.4005296397635166, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.6201785376974677, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.15956483578595942, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.425693420655628, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.2323385180696658, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5019509292309764, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.22952177306405494, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5279520952576137, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.3618488169166299, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5708179622131996, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.1712766252338756, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5225554962608486, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.2709079038456153, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.447458019441992, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.38249626297768063, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.40976234193505356, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5806197937310393, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4855332614117322, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5299556742893647, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.19940445989088915, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.43164821827950184, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2423441824135159, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4429509373913047, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6064630666233242, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6752055521830945, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14790264259417688, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.27159767590045303, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6849386986272349, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.08635800047213174, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.218109371254876, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3682311523733465, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11739521786077453, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22090491782919655, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.280413108453108, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11547518641061649, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.25945846414490087, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20233074088759792, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3746629492952356, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.40214612768560637, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.45128424593135114, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.37284875432797243, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.44888401040760956, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0925329498915617, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2110486160692096, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.12453389344594705, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.141543757252386, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2594145364221844, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.25383339228798274, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.45896379476820603, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.17200767571780612, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.3723150838362789, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.28685201698226354, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4474512036484817, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4120359948636439, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3556521383601747, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.594830811413066, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21629114799587432, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3542320138389837, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.27405612859390877, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4639958592456083, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.13004800471424346, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.28217142159025543, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.37821486365532614, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4718665834023439, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.3699382260470039, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4032851361478274, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.45167594566243024, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.5169677927619225, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3780009826926042, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3925121365052661, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.47788592802001717, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.1423412184218882, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.2596718628394258, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.3572188192648703, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.45381175288762937, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.07425055521504613, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.18122341046764998, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.1978585723043446, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3527599187160617, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.2523019529343173, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4406369072888057, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.41072675483179805, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5635589150380774, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3883375900135818, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4643731845106876, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7123666275414222, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2246029757863831, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5773502691896258, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7999099314029202, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6417603075499863, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7825422900366437, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8503171627677965, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.37709297891717664, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6881502501430368, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5581982021478125, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.652013511062815, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5881561248602009, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.39858613265631837, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.47160616105623426, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5309982646782259, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6151179643430991, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.39909989628767284, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.17181529671327242, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5293474685884572, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4429196299668147, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3237722713145643, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7426638026175545, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.49342175914364256, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4352628824108997, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5116862201536014, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.33471616336068044, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2865612242047131, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6433813179203622, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3598792258309727, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5125809225356253, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5539920925426138, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5226572946586268, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5073395824633415, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.29382595610734974, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5773664661124461, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7013062757071812, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9303769449292738, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2381658499765768, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9063898435384111, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5309354663044072, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6756014232714684, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4529852871970908, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6941474239078328, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.9457416090031758, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9892952933418456, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5738396574789242, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5738396574789242, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.40276720463657734, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6529271690805427, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.30188353873287377, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6086565367747951, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.6026286934891149, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.8025775976044891, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.7012294787544179, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8478115719875968, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.21690365808279138, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5384773678665918, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.25711386542134795, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6088853751738869, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.38091370416670794, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6438225861756911, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7202697992734389, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.5309354663044072, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6990707992725005, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3416581331218724, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6578570934289981, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.4797543511401896, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7240781310560407, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.6401876410870359, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.7526484951226097, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33057129676705455, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5669225664686625, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.30350690419450826, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.569133886912883, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6834516951654327, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3263040636562357, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3751840463233443, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6279894552667558, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.551397074868541, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17470942957770763, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5403400891349619, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.37392149096896676, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6665214662145853, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5460240376042262, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.24343304284910333, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6275577931282961, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.28571962561926445, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6431872581462166, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.4547900039222725, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6541971428810075, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.38305978177479755, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6061131723054572, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7289444696770301, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3684981984538114, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5606332518476288, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4536404448264584, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8020827133708689, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.4545091839935173, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.7166050399790445, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3370129264673147, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.7096874943799061, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1624355752882384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4952968469712617, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7807505267551733, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5595205105615875, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.8322210048001876, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.49125115898082056, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8578928092681435, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9422733087334002, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.42818224355402373, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.42105372680687736, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7001171094008295, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1582866049832572, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.34487142413575794, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15521606028436608, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.37645329404497957, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12620429887108936, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.35580703793872603, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12872220631084524, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.33602633953270183, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.042121062429802174, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.14281404499176092, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.042575418285137674, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.05173688961049459, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3045613775157565, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5275070803493389, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2734283774929853, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5252214120598302, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.10203846572325131, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.33381153680096753, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.014935758919429663, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.08106107745254391, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.044304867337633724, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20806974344498103, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.08860973467526746, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3178004360288637, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.15268019045355535, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41028757620299977, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.030860166165309233, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1100250143829584, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21255327712152144, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.43272151570555034, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.01486609147288197, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.13893773605583024, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.06609667473412645, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.26197209338359717, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.26064517697298795, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5092206110218525, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1507980395794452, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4306039128585424, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1438459189500836, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30693371625402605, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0979038733644086, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30211704738953993, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.009624974244068071, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.07318255686027669, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.043420474648595074, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2884095690753619, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3094469764260441, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.11091252683001185, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.26607634610445896, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.18154954789336694, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4557483776072868, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1381751568911733, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3121557499162649, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.20065115069964384, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4084885616013531, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.12291219097556666, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3448002180666873, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.20608572305725564, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.4704943905570542, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.085416483900781, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.2825804066750608, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.11452508920842025, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.3212742401272785, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15478222669012726, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3550584759508654, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.07875433150726119, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2638954513805452, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.10734088848154077, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.33946796348247366, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.17795920517030017, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.41862955401967455, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.19388048412249795, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.44361702376789247, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1237012344369667, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.33331866832253354, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.15589802574348086, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.37894206802233305, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1948502778967486, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.35525815981538433, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1618333627385132, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3458746996740858, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.17393111207515277, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.39042812195808824, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.19064689695123957, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.36954921822756504, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.1785851272602057, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3800733399524004, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.20113943179758872, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5054929215592371, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.07088281524771703, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.1725752257112697, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11901413329120636, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2908877283991857, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.15593857496482408, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3832822126692406, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.21107720643690867, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.43911506176829573, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13410301071131794, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3942932268034351, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5308555945242818, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1327526847508867, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.37850602486495205, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18405035438430847, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4142901090120915, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.061826017721563604, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.08852681798207009, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3583179111355935, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3857436691295343, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5750224388123065, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5920893212447781, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6925021521158101, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.22478613858269392, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.44348101018104913, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.183687049781416, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.351911486970854, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5181825846579515, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17328174803055044, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3178268797869574, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.286608441075188, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4579283646292802, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.25861130592298187, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.39452644092432093, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20379250618355427, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41085414309816914, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.310679343206099, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4471183729584148, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2334787866969297, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3621517589760531, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5866873582151947, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.46269559069048716, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.46872641361415845, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10434360980785336, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3012789660952507, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.13835317113453516, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16343842313572918, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3986641525285075, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.1690979933029136, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3751861276375209, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2840563956846642, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5110250591004448, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.19920413481788912, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.42537796926163113, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.20401796878756984, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.43317630453631556, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2044887070217883, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.38471585132587544, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2980504190448601, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5101268920225042, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.18831933500600306, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4318025704181776, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21544027588567594, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5040038440508637, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.26970223719007375, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5172978597562362, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.30630098078522544, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5439056051092116, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.19850842371858787, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.43584341835040474, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.20170335119323748, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.3541251997977811, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.10508106635796587, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3182774828667731, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.2309552734743087, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.43975656978777905, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.23530033724858213, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.46208607300298377, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.37284027455688556, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5528347504734102, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2887308472548599, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.38846174119508314, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.15487293534817623, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.39293494862736383, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.21741853044139284, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3535910166292039, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33626819961829335, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5466581859383387, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.32000331642122953, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5480591855923784, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.21132630077912357, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4175670766052166, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.13108369255325433, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3929302741911199, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.20174045447955946, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.33729298835089516, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20972571494011877, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.395894071208527, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.35369375385786006, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13087682931309413, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19462952976787054, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.013538497707846785, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1570208067577934, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4113045280468524, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15082713742973322, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3965911699770542, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15471428129658016, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4580211317461481, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18928475425929295, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4916060435820526, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.21940429389247643, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4343280866601455, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1824401863423467, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36709433185688595, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3377854698776805, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.521201229892482, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12475846123062707, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.27823340731817514, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10759927692349745, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21065794536310511, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.07843772989359644, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1324578891826276, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.08163977068875294, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.09047502044256338, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21669141850731985, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10322985794794913, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.24491122482530842, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11809057094812304, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.27930342777387007, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21268444697113978, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3229997133764549, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1475503033983142, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22104108935973044, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16434349396840395, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.28582614857210975, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10085167559661873, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.23831215045289575, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17543744527808774, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.28201016956553354, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.12274092982883021, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3385513651938691, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.31017716089889963, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.21688283061839067, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.41775824162589076, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3228288840559658, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.18237599479708327, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.3740403511567824, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.12876689524369925, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3253153379449275, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.119159749312327, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.21297942664093145, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2036348471340078, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3472831655579266, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.24362353508932386, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.28135849152758385, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.14482189302397735, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2913876815877049, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.16306957103469613, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.28112283847231073, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1308613527030366, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3063146286877558, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.12787395553510186, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.21931515993565381, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.1441966459257424, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.14957316612525498, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.27675048474641756, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3780460244391623, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.12503614625842938, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.20624064341134082, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3368893372278425, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.2961559727627133, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.12846497020051437, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2670865602673704, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.28252374116432993, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.3549531183419122, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.26128489301072644, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2126837065505244, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.07149097424598219, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7774075575820374, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8943538262827356, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18639667871924825, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4540232715517938, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8431643718744966, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9341410275694613, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47095916883357913, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.701526330557871, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.38260294162784475, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6692418584049541, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4093629115744712, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6243156092220487, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.36703839483583006, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6725357332891145, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4322450379367835, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.697398762810304, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.41122010762096617, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6697492221087861, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.41126318495820946, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7254294465493162, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4682601513034942, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.691130012325589, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.35334199245807973, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6858610070406853, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.33061666631099795, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5343307680770133, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.38981415389445495, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.665622189515994, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3223937524276847, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6719135382778884, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.4466645979681496, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.714247354760266, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.6233091888805312, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7757111039890131, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.33414322499224436, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7159580680193959, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.6620694102966999, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7893416551805176, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.26540383860058264, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.51610805930355, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.49335830881778164, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7240615166053675, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.4024279293206815, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6798070651801875, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.6153147385756811, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.8160952378322835, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.6838493012537611, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.8178509424142287, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5169198985488462, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7879691803533485, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5223010192696725, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7442134884509299, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.3885151883045163, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6763151870864087, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5985488590218004, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.8248561222494313, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.37163791993879014, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.6792432753943116, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.5152630372775983, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.7696821316655393, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.43521980294891405, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.7204319998551938, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.46417187236805535, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6653227698984816, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.519124054532681, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7733428788002137, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5083170211670072, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.755952798269267, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3161432307247198, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5990810117425377, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.40980949787910764, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.7145653936496129, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.5770135999436572, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697316849447288, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.7030214416074754, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8357829168322639, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.49199339399396913, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.713934780293142, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.5002824356846001, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7029341279811726, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.029124970213905314, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1779610499753793, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.05989397907532586, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.13539167567510446, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.03073685498855941, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.08933758530290428, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.21051269871304829, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.18854722085547196, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1387123733773652, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.05499461839884487, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19978068293555115, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1388011701223677, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1460389336009171, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.038796252164058714, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1756002877791377, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0029868578255675027, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.015380253532528225, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.19065171436703615, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21083781655774478, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.14590438247348272, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.04379419293412465, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.15119622228734425, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.21315318926996712, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.16991425356152365, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.22371589981083434, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.20982178138488494, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.20189358781069322, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.20261685251676126, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.226729844497646, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.18184342512086546, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.2185121523322681, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.17386106914161167, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.08272059515141832, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.1814025725787457, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.23945930551153607, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.20815933215961574, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.09886053260067004, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.14345644530149382, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.19097844728039898, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.08246021416977749, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.18868639139421345, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.20665565461558383, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.17764901410543646, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.19312651305380893, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.21371557282714232, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.18854043679878274, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.19559831357902827, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.1914895496057553, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6666935927206881, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7957561291403441, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.34999116613463505, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6356075517191035, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.48649824146709, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6763447333054696, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.367622917844187, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5615050712672139, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4081538556642202, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.46386216052527535, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4300174433641992, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5099800158255156, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7963205130973803, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8101688749569373, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6570128212612868, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6262090565616182, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5866943184579982, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6390393619950272, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.01047222192173988, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5683565265173782, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7072367582469653, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20287366424876002, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5368464080033196, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5198707241967666, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6993305416237223, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.36603776814499195, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.45532918164901276, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.13525036115537795, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3120848453730729, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3474347870952493, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7073395735740273, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6577952971578602, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6570128212612868, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6221526807313811, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5460462259563637, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6641829079106271, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.04884431803904408, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.18357384275951122, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.28073304156067924, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.360657984953223, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.46365764298816153, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5757521453586436, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.3147715014841853, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5986154863155839, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.3885646234110734, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5051669760132699, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.464413403675355, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6291656356697347, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.30490938758882236, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.579088460457721, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3758073513458154, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5302950018189692, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.29308025637967977, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5715200997140051, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.43285599641891276, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.5551678521355665, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.25984882476296983, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6305744214119023, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.48649824146709, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7255446918266525, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.464413403675355, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6853183317800515, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.34999116613463505, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6356075517191035, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.4426623526629488, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.612058732370435, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5522004843736675, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6166558670381421, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.37954187220913477, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.5550325994532472, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.3147715014841853, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.521228891025682, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3964513253420688, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6095420129111676, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.36033217429111203, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5550014071110869, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.33403925633579773, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5753930328058733, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44882520213790794, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5856175239899348, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.42760828727369016, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6065010489098535, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33403925633579773, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5915394296427854, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3212785834179169, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6158121620368939, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.1751489536280261, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.378593296276962, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3214110553053944, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.49232390716994445, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.479033905070678, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5975149526416976, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.21177549089429396, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1424915360855107, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.23985076149753726, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13309638637723345, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.18696197122203645, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12256515595630638, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.23303109995893123, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1419886619859991, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.24113733359485448, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1324448705928064, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.22863839042697148, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12017886776600228, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20794486026487116, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1164257728844972, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19249901344360867, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12325384013681445, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1960232617116645, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12806473847444227, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20054688779645718, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1345714227066951, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.21078968525268058, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1802615495980454, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19630112442374525, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8212614342207556, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5888582552569348, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5888582552569348, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.43550490048931545, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6419345531187637, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17539593635425982, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3139104155809725, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39225487001250453, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5189967318357492, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12859070457371286, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22162336097079333, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6258765997974801, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6680248455809015, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6258765997974801, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6722124517361844, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.17023327167529265, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.2521455524828544, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2229548791980166, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.15247670030930355, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.1324448705928064, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.23382021475411732, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.09766807787022613, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.16788063248730647, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.12111615182138995, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.21505717177216926, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.09979796185764318, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.1310501345458609, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.11512937599552589, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.1852451960926282, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.126642985054506, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.20913543330915318, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.12632059501697884, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.22490978846607526, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.1352612651586241, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.22176710342008016, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.18982400330057914, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.11760179026027952, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.19531596229980544, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.09968269909242322, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.14510210137368384, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.1204925245474865, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.12192273449574796, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.18177358407861108, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.16841504132177978, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.10667790151233097, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.17427579502643556, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.1508875367739971, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.20889434105456664, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.13184959768302618, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.30505662513933907, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.09878901581794378, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.1651800705978423, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.20736628090200235, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11824658049755846, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2047497542808756, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.1461072488843534, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.1946917085815184, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1018151014848322, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.14524830913329922, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2589080403198245, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2189767496390278, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.09761931247072746, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1397102655312677, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1326689502117876, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.167569694983793, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.15848968577272604, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24447662789322752, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20665940380705064, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18243716955007858, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.16168125580314086, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2450013599045987, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20901732384345645, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20222677481313764, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.18492694642397273, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18243716955007863, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.16667457585564618, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.08556679632324991, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1575852366903021, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1474874322154398, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9202663016973823, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9263876898254182, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8621431910551439, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8363304387269249, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6656058483395763, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6306557167105028, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8657947138469048, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8367521498141209, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6993348038140574, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6335836519040372, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.11064738383914807, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.12449466772796605, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.12222372495044852, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.12383047729216191, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.1392580908972882, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.1333265070823728, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.07717159074475938, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.09413026539458375, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.16807498532991816, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.16404257857373192, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.21005284223037346, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.1679703861465872, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0951509584925814, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.12014553061064691, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.11737915185320068, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.10085050674562507, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.11377195287577829, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.1301681094143453, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.09455636771034115, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.11463120929696417, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.1544787887603271, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.1384236976807813, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.11488572123868507, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.1455973492295447, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.13735441291745387, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.20255423961944058, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.205408273869532, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.11470196605012067, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.0960438892364715, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.07184436307032757, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.20378989148152887, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.16337212771611656, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.09669863605676213, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.10886215421099144, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.18171364159867548, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.16245793974098002, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.07562263205281951, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.09819928715831736, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.1430606569063152, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.128073928655324, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.09526781380423786, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.18223449608285797, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.17127401148639734, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.09855718610544388, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.09669863605676213, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.11679541132562438, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.09643517424337235, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.1226126790254367, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3969253441303859, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.43277080710930865, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.26887073704667247, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2918476164856665, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5183146371291372, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5942793492554739, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.028864519535915668, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13535086012687783, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.29687399422087424, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.002376388269368755, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.04574695485583133, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2703094106380642, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2982249908859, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.29313061087267483, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.30295384730328956, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.284911205299835, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.32067889250923776, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.29353055611145706, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3381266475327612, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.09910529437987022, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2511990291834263, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.295394335805579, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.362515947701148, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7243776840931383, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8980107630353439, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9325718821645923, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9490053815176721, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6653044831075519, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7986980418662383, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8504591592783618, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8980107630353439, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5950322600507224, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7090542316843602, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.44768974737795825, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.45520472994232203, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6509298345623671, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7962234681835563, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.41813929088914065, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4779008399806691, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7243776840931383, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8642805496461259, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9506885335787997, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9606382935593174, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8665175293126633, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8642805496461259, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.342569723746894, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.47156710056973744, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.2319934375578505, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3367678538644817, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2261681529206079, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2647144854968396, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.35554722872430145, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.38873710544604445, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3720000272862786, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.44695658930348453, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4118588818865406, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.48573453292579605, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2998354233286452, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.41144215385645566, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.42142495511264777, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.49708063531780444, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.33296735510279596, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4176386300927819, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.32522259162581857, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3572499606049779, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.3449668516380805, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4341194278942322, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.36161896085795575, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5052818563161547, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.2798191316489921, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.31866179281073254, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3170440263520106, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.45327673850268096, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.15538140800156827, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.22365453282977818, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.1352815632479558, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2610624350708668, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.35907597395908514, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.433310273977633, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.33498522957587384, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4529680464694055, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.30675389390381064, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.49190118767827684, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.13922661372145656, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.27553494979330584, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3515170550015674, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.37881852198491145, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.30950829536527374, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3839157172568008, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.22141947821999777, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3633108862011865, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.2957849631521743, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2872269269040579, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.19474118932727338, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3257294949902081, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.26505727008662233, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.41342120940573923, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5489548889989204, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5292552311493306, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.43141660874998483, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4251732952639193, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.479859141564773, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.47978767796651084, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2751349202729036, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.311148395820729, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5435154526669127, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5395341377171525, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5777979902630328, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6331337405946555, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6121338866063298, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6222767269627676, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5440627210252523, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5801365308278273, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5097049681318312, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5622473457673939, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.42567378467735034, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.470165978205223, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.47594607773277786, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5363851621507516, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4533373633026252, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5042718376547173, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9682566771439106, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9779127328168863, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7040822331405046, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7673268835807536, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7639225615341296, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8135226479972402, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6729400620282456, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6736973998414632, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7640211005075139, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8179683170395244, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.43141660874998483, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.45005622460103567, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.5269212212163125, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5528502361092263, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.6736973998414632, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.33491174038847354, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3646077683106875, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.450293182440332, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4822292034174927, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.19834633509680927, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.2712763621688402, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.546749262754264, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5830342194369027, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2754139367364165, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.34665831783057166, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.42877544777223947, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.43803970127356867, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.43908893511874636, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4785460996828672, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5898466143484524, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6611594562951559, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.44701416909786756, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5245065297475329, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.31417347869916407, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3530975487930333, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.6373258340947424, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6437421244363288, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4715455630189013, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.543275675805182, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.2807304798995431, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.3418543172008782, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.5397682182130759, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5703951757357331, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.5446420954986508, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5662782206307382, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3378721588486122, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4362453299175689, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.49288474585647657, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5578180330951528, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.36197274748300795, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.36134314178088084, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.17060055774694924, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.2566677182784047, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.5717883675148524, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.640780099960748, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.221071468018936, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.41620491059292214, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4263215396273059, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.3711481893609263, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4101392170618868, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8813081534414112, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6486802664285581, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8066891982024211, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7344798528986015, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8855631322316195, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6486802664285581, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8585894188661937, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8813081534414112, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8434569599214109, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9123500588239437, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7849324644314795, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8934780380564308, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8799941663695641, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6809354000776107, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8640242853252401, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8841725044915145, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39503194300684213, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6916289318228928, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3094285625931604, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6328843883953666, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.30888995556875376, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6801864286113619, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5512199399393973, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.45862256824436665, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7660160731572102, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47770079267358434, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8053780976175922, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6259358824502687, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8067950339997761, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5296344689827603, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7183083787484315, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7568440125092788, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8347576899702969, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3001800600660342, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6794930944968381, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.18879642915927602, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6584653291380502, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4697979053121435, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7554660353280213, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3164389365959547, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7121929522648841, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.6031798395521694, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7819677495994619, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5646631238098637, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.836206348617966, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.36615107686578496, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.696074520676609, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.1543252261021413, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4932064977882042, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.6966863379186454, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7941296295595748, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.5487584440377526, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8692797308530646, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.8787142254774354, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.944457825946867, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.5463887965663883, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7033378749149323, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.4912217876159168, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7991339910300419, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7251215108320924, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8334871013677937, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.587725019570444, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7957550794048827, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.28856268147560865, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6187787024786685, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4402122771181734, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7716344099519011, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.18465966669442654, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.503938463452404, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.17973438065210462, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5509051817440759, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.4809103179432793, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.7499547288317748, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.6244070585346295, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.8433626077474702, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.43660156107563336, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7165816705519701, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3748533897614559, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6863935447402433, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3607442374649342, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6876955247522804, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3718491333506089, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6941552634040441, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5110976370499285, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.842915559657988, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.5591535564944223, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.8079980831297509, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.11809858631445573, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5943886568930294, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1423170365140828, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.38605131339325, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3230989128220882, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13860487750886114, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36659667376085786, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36295227908523897, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13860487750886114, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36118801210741663, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.40877861250593944, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.16673024281943524, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3975048254243706, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.11262865194228103, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36030161445252334, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3066941236048102, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.409404483413751, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3629681915617596, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4136500403395244, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1909693288724605, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4115524982336727, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14192760409508295, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3989311390496819, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20304460086424203, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4966336271433132, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3935462418730863, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.33523829330170474, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3250861966671464, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3051626462022859, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30944349609311117, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11556522074454477, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.372688132616477, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.22392361812003433, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.460938469666163, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10704943109718215, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.362953271903766, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.14392660099814805, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.376362134090542, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.11718316363212337, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.3844506520287143, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4024646900219184, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.08197539732074254, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.35287478964221025, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3502198678697797, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.376636825008991, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.30372034137078635, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.21481172921264619, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4009028477501074, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15065778147399764, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4580508275161034, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.21281360709834968, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4292702902558381, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.13780534982274106, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3273034480518148, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.36078900962911326, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2491467453273127, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.47986445165634506, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.10905122148101043, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4502571446121065, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.274959074733397, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3607206140473947, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.17796237395371306, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.48209511527864385, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.13644487773607678, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.36491236604183974, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.21850577875478958, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4494281444270959, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.31361999490423276, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.1222354265296326, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3727252294250617, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.1109484758001971, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3612426584883393, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.20356858406857398, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.46358366365120834, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.11530762783711283, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3781690117672006, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.11907182322580316, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.49599003474365394, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4220964985804286, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4455062898838481, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.32026140564476524, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4016870075045671, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.34697616124581016, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.40373943351486685, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4184617303786878, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4321132548050678, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3499900041521066, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3822330369569219, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4220964985804286, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4455062898838481, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.41428013900466737, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.425713879206717, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4184617303786878, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4321132548050678, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5102296603076779, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5412065437629714, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.32282559495424096, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.38266426308756574, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4230074457298372, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4432451111759523, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6363676859401174, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6744544901797789, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9271746317040298, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9736668125871423, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6986939462620247, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7821077250864037, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9184678024441792, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8884834862973964, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3797391466432489, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3481158447116987, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.31102805827817165, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3375837027261476, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.19710660977672484, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2646181750020499, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.3797391466432489, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3274816319655301, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.28493958837889694, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.35876163607595707, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2485364833746714, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.2873862688213756, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.41664461891968263, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.42600414573009276, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2710684964643971, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.2982841390442802, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.23005567239800093, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.29184715566281483, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.2741455993358603, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.36403543443534025, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.34279101776553306, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.42600414573009276, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.30955822779938535, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.39546682876478195, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.39475108115635776, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.42154888635191134, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2781617026804374, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.32302333182207527, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.08473168573832755, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.25650903369815853, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2883871807684295, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.21660761852515356, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.25414220830184964, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.32910644083871465, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.29306886812256966, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.18084108219203518, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.27583433958197495, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.25612947694888455, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3002607987321696, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3216291288446239, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4272249853925079, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.188590266789637, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.26177705380820604, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3308736026652116, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3875427536757155, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.28432597056103653, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.35944124408933287, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.23631465024334478, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.2692006325646732, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.259615032947222, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2855780701161316, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.31343233007308363, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28662182336952924, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.289946670354745, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2585958231966256, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1574562620502688, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2833933092608246, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2330649391612961, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2858508520944113, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17248469309075373, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3673041887389201, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28838937143148047, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.25480888745972646, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14839290005301392, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.29565285341782266, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22266775943086, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10508106635796587, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2504422832248121, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22563365567811913, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12913533075470382, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.24776496881674256, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.08680476715745516, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22066482174709295, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12117880855911824, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.32137825349405363, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.20104685618767446, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.25137213099939626, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.28372673673489807, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.164799256779143, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.32187376249458133, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2969522070783606, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.14440617372843148, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.27200704330334224, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.2442053369522631, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.33050427873462274, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2133219421911448, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.3424665224706109, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8944054777319608, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.24197054442617688, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21682999057776514, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3722897460532404, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.24424323100599224, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.2205591704292585, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3479467223515336, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.1926917267834754, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4545444680350158, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.17580772500133016, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.32957763052496886, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2148084015365523, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.40974307981059804, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.29622141199363383, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.24146688269469918, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.09958408398703665, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.22890983822248492, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.20795712301883962, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.282761705091657, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2551114536415265, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.18112053860965763, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.3266298821510716, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.1423412184218882, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.26467729752192487, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.286072901441292, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2851456053265138, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.09858834583812252, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7445389400758123, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9134769668037408, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2506297252541463, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8320381765431424, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9129044064886581, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.23443139907396643, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.29972668857564216, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12409597120849801, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2928237514438983, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.15083364266523736, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.38662429787924074, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22849324967229787, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.44152236347960977, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2989569143807341, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4042166909648807, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3423939053207622, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.17611268473423294, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.20441543914149457, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.18928624746011372, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.43639616127375797, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.29213008358451265, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5828788445270403, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.14679869139754204, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4021419566569229, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.329340597116918, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6347143291802012, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.2868708266227936, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5779499593492363, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3436610762802303, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2782087319667435, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.632418768195088, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6392851743718383, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.23050898626566632, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.48172150010681464, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.1969221590285716, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5644899370701738, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.32594818888335836, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.49646222671189383, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4604008032403599, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7444026788985108, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.29161716271402766, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.47302621872495865, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6854823532900025, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3546725638586892, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.21468316165048362, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6851126041819388, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.250737833894674, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.40017617077306594, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.27204995504877727, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.2743963944428051, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.6341922683775969, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7252122374710612, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.12586347848916266, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3554854950683664, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3889045463729729, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.20229280648000492, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6194717199605934, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.31114459650134146, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.11856660123276004, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.34601719602607445, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.40072710492884706, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7206046648616748, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.22174147515312165, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2117279815687756, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.33999170096577974, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.29221353951377876, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3058731661111107, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2747352174231836, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.42736771185803385, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.39727964545172, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.10975022749274138, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.13904829787402162, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2873518361947954, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.24505805183333226, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.33495074569972355, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3454509072842772, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.43090467385890824, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3813511699401743, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.22765977642995502, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2247283208344801, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.30931906627981315, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2527893205238235, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9210500207490827, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9069369532463243, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4607778969984477, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8103868370118212, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4885014761119101, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.827819363745503, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.17903870455040152, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.18440575845606422, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.1981763713215807, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.2520139548059959, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.17499310607879404, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.18175908515502465, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.10089587713517954, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.11552870044063634, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3168035112884022, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.30580678632835573, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.10825039887617824, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.1278708456868984, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.29705138694670025, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.2780223931578523, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.18986262747887736, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.18230825914917978, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.42442305789888696, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.42734795538422576, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.18781316135387768, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.16808430602651067, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.3454156644973841, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.30446460704247824, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4479597674250984, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.41132840401983517, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.10704445941620296, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.13527356658034445, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.09941527806251362, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.13609735884978696, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.19230259308735756, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.22211286692050705, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.20383889880388334, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.17813562619757226, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2986551380628858, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.30308773908860176, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.10536111661637193, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.13679626017050403, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.19732230687816163, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.22765162763479738, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.3987203877706927, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.39992851145514274, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.09467800236923245, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.11434380596647938, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.15034676904545285, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.36138016740101575, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.31224382417562974, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.345966570287759, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2816115803298224, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3461146475963348, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.30131374176129855, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1552102601937674, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.1381803727119777, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4967067363118649, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6330776418175281, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.39501632817024007, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5629116515332234, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.44774758283371513, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6323151453499094, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3353166764160673, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5279751808070301, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3340392563357978, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5542299582982266, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2288355034549531, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.469883747317403, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5472915485853102, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7136367183558585, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6159995640523437, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8398584608765305, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5155625728615272, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6435263800797054, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.32206162101132135, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24125880497129865, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.47825499190432214, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3292010361291119, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5670300297444607, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5511532346688224, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7550305399541021, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.34537865578685034, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5956718372193373, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39080227521872696, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.621048393466749, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2755396296659942, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5033588333252278, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5795086255869999, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7183582779188291, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6214211316495574, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7844755306149331, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6008383045972477, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7291842011448325, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.25418196696822093, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5117784549266909, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.274941620352113, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.4651004879148919, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.22743363869750483, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5634710936922129, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.518836150464752, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6242496691584447, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.5989032124636781, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7291306908177887, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2677353447271197, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4460422364967209, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3558785149067877, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.570837784052645, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2624310277292268, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.4915471393606767, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.28489318277723963, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6000278331909762, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.5728668995816387, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7460634178179616, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.46507550803536196, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6687857543858925, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.5155625728615272, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6435263800797054, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3639412530979476, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.654342605671994, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.19882981891203355, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.45714526865696425, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.32269274420690436, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.49704406859630557, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.42849655626964983, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.662646931303495, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3937441173550755, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5600824723479425, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5107406700140826, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.419793811546288, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.6152785242440109, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.46507550803536196, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6687857543858925, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.2296660762967038, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5259172094145851, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.39501632817024007, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5505822266189535, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3215000448278979, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5947774549102596, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.20870371467330825, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.40726160697608454, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3460579711860666, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.43910565102067395, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.16692770661327389, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2940239540182693, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17589867762235817, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2991014535844428, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17340302865304977, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28581037214602456, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1782509297990519, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28710039249342334, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4901491669500622, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5638035394617603, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3460579711860666, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4260473803699743, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2011131382865372, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36314253622836745, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17598839092477797, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28650792027744043, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.15997462319973554, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24731742205813823, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3980108204104697, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5611872124508993, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7431443902355421, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4465866985385432, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6260699913485588, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4465866985385432, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6260699913485588, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.25509991414681377, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.505614827211273, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21452424426866915, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.44780791445343104, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23857086413632697, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.47971483823439903, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.446411600799131, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5816697577563045, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4664526119731094, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6399376431552989, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20156032858716424, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4855075115512445, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1526900266679129, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41716995830580594, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.23259933287371404, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20835831728362864, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.49812931259693377, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.17334119484500185, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.31463785312250736, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.12522096513057643, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.335302418196347, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.9100527513271326, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.9584484214161733, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.20156032858716424, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.35007862377558696, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3449632275226908, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5000457205552167, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.1529699053146309, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.35702516223197556, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.15975615838102766, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.16928451900289662, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.40173762794247314, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.16038844415635037, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.30359085570641314, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.3595283251171754, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5790446318474887, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.20563705341552085, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3762774944524412, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.16692770661327389, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.2940239540182693, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.14165832410287266, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.24107149684266257, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.1258646065963102, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.24857006332411635, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2519649154562495, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.44974180175388206, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3253958243003269, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.45173371737296786, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.27618177741751665, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4305107132988055, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.1683625745315614, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.31167225759119427, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.13728361101885644, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3436250633828196, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.16353712933127018, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.32934735468962634, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.48680589893384085, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6190257724123215, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.1551293035275564, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2674082220133274, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.26091874007348304, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.17598839092477797, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.28650792027744043, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1332399603607437, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19971937750838645, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1834283688193615, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.22588088032876846, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12425342874478343, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1660533764831914, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.15538689193055893, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14158209035366248, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1869416235999822, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0578819658044546, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.16170596160446446, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2206817446345091, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14914968848461002, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.21702090583674813, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.138685682297543, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1258687317121735, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1327332961698289, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23556366957615363, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22894370639738668, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16684195647378827, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21420692177337528, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.33150414660895594, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30808679013173407, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23556366957615363, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.25521078373566897, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14257880024595157, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1979524022915653, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14257880024595157, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1979524022915653, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.260711748598298, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.28143225165615565, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.25621420675166556, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.32613185963061736, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21310996044302127, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2620829676028965, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.08892786873926031, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.14069122234920528, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12273033502938982, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.15070376710164984, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17376029392152273, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22421987263715565, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.07369293827420972, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.08728042965046878, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.12416744870990627, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.06452498627127952, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.09758509152849626, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.09985298970743903, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.22158794642706012, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.20787168962643957, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.05401240601013853, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.07243671671799473, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.1543646468773244, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.09348998462584433, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.130990604448226, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.09885362316286796, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.15900429623613993, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.10903227170832805, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.11481934989482791, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.1745453831609756, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.046916282267844764, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1250076305588977, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.12985392271660248, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.06737080019124615, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.18629057860741663, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.1504281768235603, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.11099491388125307, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.1201070010200949, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.08702826664587757, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.42262353460370816, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3966051357904673, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.09612004569821603, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.10249207815381514, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.1341907303110576, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11635402454082566, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.1636348970852316, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.06028131279303415, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.0901676620993871, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6983671476675032, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6697193437120026, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5809024483660724, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5409616569206442, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5893051076561628, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.555242666304663, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5197038614969076, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4944106522194635, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5863087308455573, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5756247354842696, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.44763438063632005, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4327706284829231, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4562933372999328, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4354000091116894, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.650945489442927, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6025447507087655, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5040260890269513, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.48159079549233025, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3966338449810425, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3940867714969907, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3186669369694382, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.34867169182256896, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6350785093832516, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6188888500556722, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7997394936755756, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7811228513409922, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9660854289024723, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9613867167137871, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7158159753911548, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7127947486849641, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6813410498464633, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6671821168913319, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.45066539224706753, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4254592023616511, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.45779216736532874, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.40945502186629257, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.40071581088356767, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.36844216279073794, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.14609848125563302, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.18504017619904287, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4184317523303411, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.40500270963162277, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4125433652059801, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3955923992862865, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.47182538941865537, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.42450279333172475, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.46492333059956836, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.4401112788616263, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3967795858478363, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.3803134453035716, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.34915707707242977, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.34988691421168616, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2613611691981996, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2740054517113319, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5600863252474344, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5179797138258272, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.3461243385522883, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3560268535895035, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.43650008892828823, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.42551924250056755, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.504580863725975, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.46703102558879955, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.36954961729302616, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.34760122558190465, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3803026331533805, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.362200056491149, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.24777987943516128, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.2952194113831596, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.5258092834799059, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4981801549352249, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.48625052891235754, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4290939038872796, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.4045007320789693, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4098113348256027, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.44158642009003995, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.40903259597127894, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4946406341236379, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4719975064311173, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.41182432358851845, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4034715718148006, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3693186725771347, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.36304188784855995, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.3692675983091899, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.37402683054534963, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7645786047678913, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8655501219338723, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7858164289172753, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8872272977237059, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6898913050782208, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8620687741940413, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6898913050782208, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8528837782425732, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7708719635370461, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8793197587693242, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7708719635370461, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.888538633093067, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6840689169974626, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8314419144081646, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5819799380263497, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7407958979814505, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7645786047678913, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8655501219338723, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.41098733201100757, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.651283133493195, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6152755816095169, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7669297251133314, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4250002996145258, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6670552714553488, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3735617779670567, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5773479111816255, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5543498698280007, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7266847297604082, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3716332023564544, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6132388888021502, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6986939462620247, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8497711598086016, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5072570733389083, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7124868368374351, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5907596734005102, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7837270250239556, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10008881112800158, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.29125356488795046, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.041649157343430596, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6587480145435196, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7917841426705801, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.7446828000198126, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.885521980076414, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.6466833757622275, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7737914417145209, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.4447278656331358, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6742569711624775, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.600047216971444, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7511423755179258, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3382340617900419, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6182585373365673, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.6069548573053054, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7630436854704967, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.40482952759410495, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6241130944295542, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.5021718181363274, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.697189669759932, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.7858164289172753, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.8717639062922423, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5731680012014568, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.746935173521359, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7224037170215811, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8452672523905139, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.5724496367057007, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7350859720106757, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.42250552136302394, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6425389837629188, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.7645048342610411, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.876234192352485, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.45751787171307623, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6647794363792763, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.7623067286250759, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.8682092620191191, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.46189821859121283, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6442319235751083, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3931991982536581, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6422735790483707, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44644290381704027, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.6892051604181435, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.4000177797533498, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.645169701736652, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.4479818542603719, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6761961025641056, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.7123871749204508, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.8331784519293958, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5749089871602278, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.7211428196508521, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.38506289173931413, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.6152360906748179, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.6231488481063673, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7734960210241439, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.693261298341864, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.693261298341864, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.32329508170352383, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6141330847741713, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3837983925863447, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6366757448341102, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6667025833042813, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.746973053424487, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.217554942150074, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4859163400220353, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5198655773563042, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5198655773563042, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4965705242699611, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4968400811224627, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.35479105265934485, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4725761870926308, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3301899334885226, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5632801217523468, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1923904871441659, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5825915593253297, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.4892199210635081, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6263002679299042, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.09147827112247602, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3360691966057836, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2966218714191134, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5348497180679597, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5209701084013916, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3254074668234594, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.540582703782851, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.22935466869603194, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6357138961264384, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.38769943713308697, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6179897670313796, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.35964066074252593, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5418421848087059, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.31666472263798334, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5096984883597744, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2656621439255861, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.47187800221660153, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.41583634222861793, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6558319092753532, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.26633048164380024, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5038200170930055, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5371525807924681, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7677378485184402, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.15274299622833287, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4692950277268683, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.30626101600123445, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.583891679561264, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.18137691349228668, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4586072719105437, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.23443677523946913, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5163278972706644, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.33876931708826047, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.550413577565279, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.14207405313947058, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.47874702297210975, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.2539342198718324, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.46375067718601715, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.200726550812963, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.41645295439394076, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.42995245074388394, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6515566568079457, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.2834052290575623, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4974109921343301, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.19454290935168927, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.49909763892228687, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.3837983925863447, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6379993550810827, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1481394578697113, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.30063818852404856, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14216645907653844, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2737034564138708, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14939354788683526, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.29041654772860626, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5420662441541858, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5445089463670787, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.40919282596076484, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5542936932152527, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5928902071159559, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.647817438132439, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5928902071159559, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.647817438132439, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.34641959937802264, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.47549559716182727, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.42461633178803443, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5603699277937889, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2340216139262901, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4522093023662336, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.4132352454218328, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5544725906870476, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3951500216160541, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6089660957340174, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.42282359171428024, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.5395092365663595, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.35412968165085734, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4985795126785612, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.1598921499894403, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.390187618292215, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2400540439585043, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.49297433772099697, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4806604068305994, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.664228268001068, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2340216139262901, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.45184273575809186, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.16533113836624475, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4074791764578974, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.28547397706062927, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4838477808123968, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.6053011982655683, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.652613765735072, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4229247984636106, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.556465536088555, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3471790743028735, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4458106286047354, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3555508425572384, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5387745992013905, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.1709686260975486, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3940091304204109, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.25958657290343434, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.43162699627918094, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.2213908395073965, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4213527844474163, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.39696685122270786, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5497060467823045, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9411583614202783, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9389202454786235, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8775848642818888, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8618703443763697, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7861888156926622, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7987489460131649, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9319748402595084, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7613425680699503, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9020031517329425, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.883570112979728, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8263460336753243, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8060322164809728, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8980680846396624, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9491059403137463, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9664300701360793, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457224261353452, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9708225134054753, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419324607589119, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9619002332717353, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9189927159116271, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.895905738615658, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8719916488298841, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9397108105925289, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.884345665982421, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9584454525436005, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9643081480127652, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9067144042813564, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8781616442886918, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9745733081082687, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237743711831492, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659571253320222, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9044755244774213, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9016506657203592, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9259203238585231, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9226314544302758, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6237003645369218, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.919365977563579, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9113270242697518, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.898943894327586, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736119227904283, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9415432301630186, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.973004167300919, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9617726716367615, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8788632576179716, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9442690941930104, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9167527970009353, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9264966822048945, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9760432643638268, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9290639912797567, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451284616565533, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9571970948049097, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9428452278208271, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.924510998540744, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354255661287414, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9038448099971822, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9290214610132344, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359307328554756, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9462257677914746, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9685511109758306, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9466350739636148, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7785501063601203, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8677672451180615, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9519685270619841, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5524309559543085, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8979970994003059, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8979970994003059, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9598023304313453, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8678877090803476, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3628854370408249, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8866932684030095, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7932574787392968, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8840632918991035, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9244224424282228, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7493760739956499, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9434070582654602, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8626111481890223, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9742381587466754, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9614829239512629, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9634058264556766, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.846746937646691, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9416090102549223, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9586487245465463, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8628736669093499, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8883148663773122, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.921000444185013, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.964284245003951, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899852954654377, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5884852453065169, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8943359440390058, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6239646156236577, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8782485779028959, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9219735185328113, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8379214027434272, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9171135147465285, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8793006100154936, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6764135013792538, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8320911917964368, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8320911917964368, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9020259333664543, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8443316591536836, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9062739514559724, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9243814194896306, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9257122714800141, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9048929676970495, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9233238051356927, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8961117810241208, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9137011072166213, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9144918070375806, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9447475462972004, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9418568225974095, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8631885674989124, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540570534869818, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9356691952085903, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8263666332486633, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9187937618702817, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6492261286778312, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4782990117524071, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8728890059382535, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7924841060781368, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8728890059382535, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8085699807438939, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9309167160514913, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8335210974928002, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9407617520385465, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9009704508776215, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.886161550229872, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8864780713525466, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8619950335517561, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.877644990158928, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9473578431592224, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8989284887461744, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8982857165205713, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9421743042333945, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909430339396572, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9114715597392106, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221676855227006, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.903310364652346, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.43631872104818037, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.725100223395414, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8342041754812477, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7541096773855238, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9352893606252747, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7137044016250488, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8459329201101423, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9155785169978052, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.454243405917021, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4367071875067552, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9053865214400596, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9344907300105301, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.941467473244312, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8759462570863868, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116059567890715, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95453015576562, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271804273091313, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9494380676747487, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8907525765155897, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9420326057327402, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8729192735278123, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.840210783941434, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8830406923187026, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8705872791986208, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9129896861855028, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9775140091004713, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.931908394385036, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.958499216692883, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9787648208394673, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8848447424869419, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9476480635849643, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8420296194650692, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9645398026978572, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.976975965491712, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9575751193892209, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922108923148009, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9367021384173281, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9136709169732016, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9031487241080103, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922108923148009, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9717329164232313, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9810420842974353, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9296061535584738, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9548717794727779, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9723617284409432, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9433216405879152, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9253992588631311, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7833761650543694, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8958698547783525, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659983030155975, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9368374793769542, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9292848975349729, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9705333075369675, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9560908971572966, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9288860917142431, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9402643484548583, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9303023646781129, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9076656012518489, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272618174968876, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630829363546703, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9437691960187881, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9233897890679653, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9217593594034571, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9429459010031568, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9680340601535599, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9444947592571505, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9524237679532525, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8521740000505951, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9147273981117778, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9353915284262971, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9575256886848735, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544425909905248, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.887089742205764, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937272463225717, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7360571605491374, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9158962896380519, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9016185053131788, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9519313199322048, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9002497361613263, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9404564646985731, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9525612663771642, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9416090102549223, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946182450185975, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8827665860178672, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9242269657430007, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9209375409360453, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9453162319718537, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354735336178899, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9650606723493668, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.937172702008466, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9037456319061896, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9527540439558733, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9777992945719618, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9483614149601093, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630476322301069, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9090634311284931, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9592439701684463, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9352813563171796, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9578898822826803, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9349087092124988, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9553475775967099, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9426144990998162, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455357310467346, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359599516797827, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8529883661830301, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9313047211019367, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9311406569876187, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9392038901097501, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9504743930445531, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283998656503502, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9934034758807603, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9131528589305679, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9917679206284817, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9566767123929576, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359924521743563, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8893588081911743, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9583698738001583, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9934034758807603, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946392812169666, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.911875333930421, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9169315433407361, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9541325707307038, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9648123726963476, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8370298547932784, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9334875203861144, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413496332501932, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9956823103485622, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457390517164731, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9671298665063969, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9336521523423332, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9502062892893858, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9333019767772176, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9037394051488277, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283644587512466, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237582925385585, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995566191566017, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.896344147038989, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.09821094254330615, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9548273305811203, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9251737690567995, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275689564213165, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272442008199501, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9520060001290835, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058859200742604, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8789724147701462, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9137645544850267, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969027357279203, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9520060001290835, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275374047069039, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8772309014828462, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9140052999897977, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.868350408637765, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7563541659131354, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8578315979157695, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8441075622700097, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.23829288001976573, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9407267756704489, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.831845583109951, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9530684796567226, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8984174935165463, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946008414943598, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9285885624039975, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9645189965938258, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9601667560566091, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9113133701465544, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9363094557613988, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9499594621802195, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8850558582872771, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413520522974334, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8953760832780698, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9516191368774216, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.873135905690596, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331628274049639, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9350921637704382, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9398175409358328, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9675093986501344, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9282207391671503, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.957452925924953, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937237551170429, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256331955884847, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.904390835311888, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995954000535624, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.928962868887516, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9339798045072082, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8008809042180175, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240001424211951, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3493344613894351, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.92829327413418, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359271530286619, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9641555435524619, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90719289051837, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8543701176038877, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9045960456690756, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9576659929734302, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9445842802137389, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917893569547509, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9031282594956593, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9325823323160847, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9171277146973622, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9631220314707449, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9125575210703364, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9671298665063969, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8583796678495444, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9075511178990168, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8942877287874674, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8881782096383685, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8452994228892592, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.857664755026069, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7687402404428638, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8221659843346086, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8685375697135141, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7860944644568774, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7829829019188287, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.289269703803095, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7829829019188287, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.915813486906383, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.928671169616198, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9195852720074569, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9260563505342738, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8580715674095071, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8991782906832555, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9549429726485847, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8571447284090962, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.953599772014362, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9382091007325469, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9125682774652475, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084959093441131, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9389584881035126, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8968120926569282, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8757339860702672, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9742989957563788, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9854564066904739, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938338375356983, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9363458435045497, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275189832478317, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9680610688075657, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9458276502828801, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9555270393882619, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.774972667720128, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9238483556315539, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9292605756517186, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8710905917506855, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8877998658561537, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9408832971568818, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8758560882945217, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9047504210526172, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9149458726191051, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9497380252636716, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9247145535687903, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8829314518141973, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9497380252636716, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9369900232316837, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9584772514045287, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9656526051593539, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9262800142753679, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9178799098053634, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8988056403515298, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240902217687106, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9454713149117651, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457650793019858, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9115531547253959, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9403725471773088, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9107758326980321, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9251111872988325, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9267004903727016, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9652440580136615, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.924254800539438, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9054967244578502, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.886673201587762, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9082204179924286, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665046359304257, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9492870842156111, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9495327576081029, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9605742681789634, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9410712595774171, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.971921146040729, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8360964435901039, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278436686065653, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540941235545723, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7765803419515074, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9368660209060221, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9179315685239186, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9198867501155861, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9357668560693397, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.880651835588671, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9322025130978147, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8666701669384438, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9301584319196643, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9201441893603447, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4518476286184633, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8817151383770689, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9719892276800867, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9232252378020026, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90340499273861, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9445601279006905, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9284637794790105, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9506720475284802, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9650672132857259, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935825271074837, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9417006532894496, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9180957642017807, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9336273124319283, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9199623581249377, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9420383150390214, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9169222881606529, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9358954768171188, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9210475526688618, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.900422383617428, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9195975724156285, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9482591669689567, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.894400898846725, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9683895601588671, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.974733551222386, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935724475087967, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.945278116491169, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.913976993531483, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9206503738833902, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8879551150411227, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9093507960484853, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.44325871778061554, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8629899790604912, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659019608247615, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9612040783142544, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9355702448711621, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8575724679460186, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.919154316989783, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9107041155041439, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8860042875765471, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9163443895096822, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9513360683724416, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9506442510575418, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9210869399305139, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8602965545640948, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8912610518101419, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.857937519719319, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9528771181894694, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9241995664234885, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432104991415542, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8927784164557715, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8681309346882299, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9045257596276787, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7552111299277484, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.82396628763246, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8809116426093319, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9069369532463243, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8255413975339149, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116712045344968, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8855094439275503, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8809116426093319, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5085021700346579, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8155954216287978, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8849766832597384, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9321985099431636, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9158869153954171, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8940299169999223, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9029209331114941, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9434784706316768, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9504499063681887, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8103402263404181, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9033542015144801, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8920851535963175, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9012698346023688, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8815241253287673, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.955434974676454, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9190034267575142, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9028341607528202, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7933760889502307, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9669111778196173, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9183552099282611, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9213964969470535, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9438561056375272, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9245427558640842, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9466217999433078, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8600910973378976, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5465479162881712, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.908088143295894, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8689979953554426, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8837997874830685, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9430526976186369, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7213258253735133, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8583796678495444, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5352913894873965, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7506613813658406, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9120029292560927, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.969258616291086, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359933426460225, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8446197069920836, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665537794677691, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7519024768911576, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419599049218603, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9100379761498075, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9243062555931161, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9232535952320629, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9430158926147498, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8923268998495886, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9316958873367511, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9441083273271286, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899852954654377, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451690574618664, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9470556595464068, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8625414653847894, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8658510104009289, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938651167013012, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9362303281043904, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9288883358178652, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7378741057437793, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.898904151376881, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8446522700991944, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9053865214400596, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8700885813654318, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331139325257429, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8860497305091617, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8222704990602537, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8402559609277754, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7386088026745246, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.06557474419143802, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8700885813654318, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8771568927591851, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8869070241487921, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8173012945645394, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8220012279932035, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8449397341788647, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9239069749524619, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8972504357155736, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6602446784708298, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8667833154965509, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7306831212016971, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7306831212016971, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7406377967705062, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8509760908759664, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.96926930549605, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8641726957145408, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9637804258017773, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240863542577373, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9450374119495017, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.96926930549605, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9392663489644577, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8389799674466019, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9253208187778743, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9144266092886102, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9500117624130617, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.905862662289465, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9003734503251455, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.858544407149412, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9281598514152588, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.948121913854874, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9629589146416885, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9580736862318411, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9708835294542548, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9234823141384267, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9939521304203686, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9474838221026617, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9488355997601815, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424390135303181, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9711070259637357, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237920416869381, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8942780008373756, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8468261925085733, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8976119317111001, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9527352893094178, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9510981354135275, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9521144628004171, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9774592733638915, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9806060444395596, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9049668032095894, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9667317239059525, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9159800198090925, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9667317239059525, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8925738398388144, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058585844143391, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8888787903169728, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8925738398388144, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9128855680689195, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272821491047395, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413354408985303, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.46619006556188114, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.913896513382741, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058585844143391, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9557922260754473, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9391656780027514, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9260113686541587, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419307613884336, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9845996986850503, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9255228522887315, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.889174440461237, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9496761617043387, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9322360743819351, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935492418630274, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9456325305487512, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9325466173278317, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240800356922247, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9361690788124847, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938043640398588, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.901373116210745, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9531605377803356, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9132591460407243, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9494481589794223, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9415361564397403, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.897450557161678, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.937002127196651, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9435408381256087, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9421449698305296, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9607456319189528, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5467617051776391, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969209805167669, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9060555921929084, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969209805167669, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9096430262961498, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7983940190154283, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9132591460407243, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9204057102575467, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4968312722246179, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8001971757912975, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9204057102575467, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95112146871187, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.949624286506194, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9588139991437585, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9236414681715879, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9453633691396565, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278367059866518, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302237306555959, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8441460025255829, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9470556595464068, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.951863030034636, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8944443568631728, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9082204179924286, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90717359411325, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9014597856352894, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9250084453288043, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95462554022758, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9318340131711181, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736147802901586, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9182449217144187, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9400180064454685, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9255769217104873, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9309426923102619, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9002012094811458, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9690017425712892, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6924365679057801, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.804543317337012, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8385395593542468, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9515560914045473, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.854435717190483, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6676892344393273, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.873135905690596, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6885773376269438, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.46961217063286037, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8379214027434272, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.20981645725460496, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6659995521111991, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8388678282825207, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9180596829241628, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9166274634412449, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8626786769008709, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7991709881281639, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8119656541607598, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8872308158649556, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8914910756561332, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.927494511055529, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9528614248210486, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8523282278495175, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9297633204435644, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278042759794851, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8998995790099074, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302677881301988, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9522511234396616, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7585159184184324, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8922770448230282, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9126128133576369, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6245412677586388, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.868233862673363, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8852329532489643, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8378994642516495, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8775848642818888, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9448292727000915, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8555426729178464, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7558344174949267, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8497451239178159, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199763712080639, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8206722459046871, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.883570112979728, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.914786293186172, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8845568645036501, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937192042814042, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.901348698020278, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8382013802825361, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9219786709510569, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8767649499531999, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9094880423990607, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8719390074611821, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9349020382990011, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272997117562144, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8962185446474815, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8930034245249151, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271664513693498, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8936606750264663, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8803360259381345, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8680210960657176, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7931982206364059, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9438398456065387, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9281186022380125, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9184823166209557, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8884834862973964, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9300073119656489, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9381606131991436, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9300073119656489, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4072337657555589, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9184823166209557, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9293646790023864, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9511392272878579, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9293879632586071, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9277950353049101, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8843378183459343, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8741633139531418, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271525909282003, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736840552120738, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9396084767892234, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9187563342696414, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8896752045577786, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9737097349915758, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9330058893011377, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9570066548501687, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9201684039669155, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9133901345922595, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9458636432813123, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917857433142856, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9375412439691305, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9556267474396976, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489054429933926, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489054429933926, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8327628422929998, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9249365863966041, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922528755167094, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9486938895906879, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8620685016584069, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9405916043682414, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9327915990783561, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909738029095061, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909738029095061, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8707492337114523, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95883735444933, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455007606735264, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9372630850025364, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5983897920478856, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9299762198228243, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302303599426779, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544609413449265, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9355306533611718, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432457481338326, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9472285181144658, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.923828763793418, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9224761498105726, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9756278595118478, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9499594621802195, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544238060448419, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9012364553153411, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199585012210312, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9280048312907723, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9587462450914201, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8938919301593574, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9507758066685948, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432005035367906, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9675203656708941, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9303385434730891, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9401106918306472, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9533532275954528, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9274629860503822, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8926908826740254, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6224897798032885, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7893575827661004, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9478696521177714, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7160421907140165, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6217685026572488, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.794919886900137, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8793006100154936, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5916523997385489, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4849269488253923, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7462718113811923, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8083701726292805, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.722502153449955, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5544920599877754, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6853792233736985, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9200538056807258, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630774769374594, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9143443086107108, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9052744049140443, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9018850910676268, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9142574363760879, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9168431011517528, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9141901633008906, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9317477810881586, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354759108346813, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9141453314674155, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9550191440621234, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8402328635525613, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.831845583109951, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9092382099397807, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9770044719642067, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9155318202784664, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8272309965382391, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7767725512278205, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9373981486656514, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9579023880929557, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9268329536813669, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.904428807825769, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116613044583819, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084279839455062, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8013174743750245, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.34811585804131506, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8912610518101419, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084279608664247, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9342971539350323, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618018909441389, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221850850049388, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9621502301102783, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9207497282487874, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8817316559043479, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9595521389704431, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9604273088099046, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8832167531630292, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618018909441389, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9338423795983638, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8069582822584229, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432051372011929, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8538919155402751, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8601111478550084, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8274840531521687, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8737243337458652, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8213297311895551, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3007622907436899, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.906379768806771, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8996352283472103, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8577239523880982, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9705288278234159, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9022302698191352, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618116705103616, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9282902444420971, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283062281157928, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9143841728614055, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9148205155364358, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9429357495928096, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199038085123204, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9007500710615358, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9335504867261654, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8519148326217993, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9467340802817513, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8914166352994622, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8697448206881571, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9091527400737927, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9487286082082608, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9550331732946552, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9703747509928279, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540941235545723, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9543144589160125, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.652649628941592, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9767775472269087, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9393628940364738, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9410712595774171, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6444379795256558, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8964898605551818, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9263597385884417, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.824741266541094, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8283905649271065, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.881413837458117, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9437940294094723, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9269703177791706, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.944904344834561, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8479413107328494, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9715595760527852, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8595969327963556, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9538713542813556, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8348508116391393, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9572462820044535, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9375119517314923, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9493167367596885, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9344916654109876, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9849529115133767, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275259780895282, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9276874028790393, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9473074618830379, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9460494618521745, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8923268998495886, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455007606735264, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9526558782357073, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9338345156544289, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995764072227389, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9505226544098013, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630841609539229, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451142647196181, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7510122845400926, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8090165300577936, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9543128468386116, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.920197561569537, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8759929746436435, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8935424392990651, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7769676399488106, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8664932988313133, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.906379768806771, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8001297194719582, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9048724843551281, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8947987168857687, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489238765618674, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.945278116491169, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8989194854163256, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9061728639858796, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9501419212325259, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.891206254843651, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331628274049639, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9046319474149982, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9463095328863311, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9085828484030862, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8856061163721227, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9661878700572512, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.908669313428767, - "sentence_nr": 29 - } -] \ No newline at end of file +{ + "languages": [ + { + "bcp_47": "en", + "speakers": 1636485840, + "language_name": "English", + "autonym": "English", + "family": "Indo-European", + "flores_path": "eng_Latn", + "fleurs_tag": "en_us", + "commonvoice_hours": 2655.0, + "commonvoice_locale": "en", + "in_benchmark": true + }, + { + "bcp_47": "zh", + "speakers": 1304678914, + "language_name": "Chinese", + "autonym": "中文", + "family": "Sino-Tibetan", + "flores_path": "cmn_Hans", + "fleurs_tag": "cmn_hans_cn", + "commonvoice_hours": 422.0, + "commonvoice_locale": "zh-TW", + "in_benchmark": true + }, + { + "bcp_47": "hi", + "speakers": 546882144, + "language_name": "Hindi", + "autonym": "हिन्दी", + "family": "Indo-European", + "flores_path": "hin_Deva", + "fleurs_tag": "hi_in", + "commonvoice_hours": 16.0, + "commonvoice_locale": "hi-IN", + "in_benchmark": true + }, + { + "bcp_47": "es", + "speakers": 493528077, + "language_name": "Spanish", + "autonym": "Español", + "family": "Indo-European", + "flores_path": "spa_Latn", + "fleurs_tag": "es_419", + "commonvoice_hours": 446.0, + "commonvoice_locale": "es", + "in_benchmark": true + }, + { + "bcp_47": "ar", + "speakers": 351664197, + "language_name": "Arabic", + "autonym": "العربية", + "family": "Afro-Asiatic", + "flores_path": "arb_Arab", + "fleurs_tag": "ar_eg", + "commonvoice_hours": 92.0, + "commonvoice_locale": "ar", + "in_benchmark": true + }, + { + "bcp_47": "ur", + "speakers": 290790290, + "language_name": "Urdu", + "autonym": "اردو", + "family": "Indo-European", + "flores_path": "urd_Arab", + "fleurs_tag": "ur_pk", + "commonvoice_hours": 77.0, + "commonvoice_locale": "ur", + "in_benchmark": true + }, + { + "bcp_47": "fr", + "speakers": 278611507, + "language_name": "French", + "autonym": "Français", + "family": "Indo-European", + "flores_path": "fra_Latn", + "fleurs_tag": "fr_fr", + "commonvoice_hours": 1054.0, + "commonvoice_locale": "fr", + "in_benchmark": true + }, + { + "bcp_47": "bn", + "speakers": 267193288, + "language_name": "Bangla", + "autonym": "বাংলা", + "family": "Indo-European", + "flores_path": "ben_Beng", + "fleurs_tag": "bn_in", + "commonvoice_hours": 49.0, + "commonvoice_locale": "bn", + "in_benchmark": true + }, + { + "bcp_47": "pt", + "speakers": 237496885, + "language_name": "Portuguese", + "autonym": "Português", + "family": "Indo-European", + "flores_path": "por_Latn", + "fleurs_tag": "pt_br", + "commonvoice_hours": 177.0, + "commonvoice_locale": "pt", + "in_benchmark": true + }, + { + "bcp_47": "pa", + "speakers": 203571210, + "language_name": "Punjabi", + "autonym": "ਪੰਜਾਬੀ", + "family": "Indo-European", + "flores_path": "pan_Guru", + "fleurs_tag": "pa_in", + "commonvoice_hours": 2.3, + "commonvoice_locale": "pa-IN", + "in_benchmark": true + }, + { + "bcp_47": "ru", + "speakers": 195841151, + "language_name": "Russian", + "autonym": "Русский", + "family": "Indo-European", + "flores_path": "rus_Cyrl", + "fleurs_tag": "ru_ru", + "commonvoice_hours": 243.0, + "commonvoice_locale": "ru", + "in_benchmark": true + }, + { + "bcp_47": "sw", + "speakers": 171610296, + "language_name": "Swahili", + "autonym": "Kiswahili", + "family": "Atlantic-Congo", + "flores_path": "swh_Latn", + "fleurs_tag": "sw_ke", + "commonvoice_hours": 411.0, + "commonvoice_locale": "sw", + "in_benchmark": true + }, + { + "bcp_47": "id", + "speakers": 171207687, + "language_name": "Indonesian", + "autonym": "Bahasa Indonesia", + "family": "Austronesian", + "flores_path": "ind_Latn", + "fleurs_tag": "id_id", + "commonvoice_hours": 33.0, + "commonvoice_locale": "id", + "in_benchmark": true + }, + { + "bcp_47": "de", + "speakers": 136350226, + "language_name": "German", + "autonym": "Deutsch", + "family": "Indo-European", + "flores_path": "deu_Latn", + "fleurs_tag": "de_de", + "commonvoice_hours": 1361.0, + "commonvoice_locale": "de", + "in_benchmark": true + }, + { + "bcp_47": "ja", + "speakers": 119729026, + "language_name": "Japanese", + "autonym": "日本語", + "family": "Japonic", + "flores_path": "jpn_Jpan", + "fleurs_tag": "ja_jp", + "commonvoice_hours": 223.0, + "commonvoice_locale": "ja", + "in_benchmark": true + }, + { + "bcp_47": "te", + "speakers": 95478480, + "language_name": "Telugu", + "autonym": "తెలుగు", + "family": "Dravidian", + "flores_path": "tel_Telu", + "fleurs_tag": "te_in", + "commonvoice_hours": 0.3, + "commonvoice_locale": "te", + "in_benchmark": true + }, + { + "bcp_47": "lah", + "speakers": 93433552, + "language_name": "Western Panjabi", + "autonym": "لہندا پنجابی", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mr", + "speakers": 92826300, + "language_name": "Marathi", + "autonym": "मराठी", + "family": "Indo-European", + "flores_path": "mar_Deva", + "fleurs_tag": "mr_in", + "commonvoice_hours": 20.0, + "commonvoice_locale": "mr", + "in_benchmark": true + }, + { + "bcp_47": "jv", + "speakers": 91180665, + "language_name": "Javanese", + "autonym": "Jawa", + "family": "Austronesian", + "flores_path": "jav_Latn", + "fleurs_tag": "jv_id", + "commonvoice_hours": 0.0, + "commonvoice_locale": "jv", + "in_benchmark": true + }, + { + "bcp_47": "vi", + "speakers": 86222962, + "language_name": "Vietnamese", + "autonym": "Tiếng Việt", + "family": "Austroasiatic", + "flores_path": "vie_Latn", + "fleurs_tag": "vi_vn", + "commonvoice_hours": 6.0, + "commonvoice_locale": "vi", + "in_benchmark": true + }, + { + "bcp_47": "ta", + "speakers": 85616159, + "language_name": "Tamil", + "autonym": "தமிழ்", + "family": "Dravidian", + "flores_path": "tam_Taml", + "fleurs_tag": "ta_in", + "commonvoice_hours": 234.0, + "commonvoice_locale": "ta", + "in_benchmark": true + }, + { + "bcp_47": "fa", + "speakers": 84710459, + "language_name": "Persian", + "autonym": "فارسی", + "family": "Indo-European", + "flores_path": "pes_Arab", + "fleurs_tag": "fa_ir", + "commonvoice_hours": 370.0, + "commonvoice_locale": "fa", + "in_benchmark": true + }, + { + "bcp_47": "wuu", + "speakers": 83641200, + "language_name": "Wu Chinese", + "autonym": "Wu Chinese", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tr", + "speakers": 80360704, + "language_name": "Turkish", + "autonym": "Türkçe", + "family": "Turkic", + "flores_path": "tur_Latn", + "fleurs_tag": "tr_tr", + "commonvoice_hours": 128.0, + "commonvoice_locale": "tr", + "in_benchmark": true + }, + { + "bcp_47": "yue", + "speakers": 79654759, + "language_name": "Cantonese", + "autonym": "粵語", + "family": "Sino-Tibetan", + "flores_path": "yue_Hant", + "fleurs_tag": "yue_hant_hk", + "commonvoice_hours": 203.0, + "commonvoice_locale": "yue", + "in_benchmark": true + }, + { + "bcp_47": "ko", + "speakers": 78357046, + "language_name": "Korean", + "autonym": "한���어", + "family": "Koreanic", + "flores_path": "kor_Hang", + "fleurs_tag": "ko_kr", + "commonvoice_hours": 1.7, + "commonvoice_locale": "ko", + "in_benchmark": true + }, + { + "bcp_47": "it", + "speakers": 70247060, + "language_name": "Italian", + "autonym": "Italiano", + "family": "Indo-European", + "flores_path": "ita_Latn", + "fleurs_tag": "it_it", + "commonvoice_hours": 362.0, + "commonvoice_locale": "it", + "in_benchmark": true + }, + { + "bcp_47": "fil", + "speakers": 67471096, + "language_name": "Filipino", + "autonym": "Filipino", + "family": "Austronesian", + "flores_path": "fil_Latn", + "fleurs_tag": "fil_ph", + "commonvoice_hours": 0.0, + "commonvoice_locale": "tl", + "in_benchmark": true + }, + { + "bcp_47": "arz", + "speakers": 66639360, + "language_name": "Egyptian Arabic", + "autonym": "Egyptian Arabic", + "family": "Afro-Asiatic", + "flores_path": "arz_Arab", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "gu", + "speakers": 61721799, + "language_name": "Gujarati", + "autonym": "ગુજરાતી", + "family": "Indo-European", + "flores_path": "guj_Gujr", + "fleurs_tag": "gu_in", + "commonvoice_hours": 0.0, + "commonvoice_locale": "gu-IN", + "in_benchmark": true + }, + { + "bcp_47": "th", + "speakers": 55181920, + "language_name": "Thai", + "autonym": "ไทย", + "family": "Tai-Kadai", + "flores_path": "tha_Thai", + "fleurs_tag": "th_th", + "commonvoice_hours": 172.0, + "commonvoice_locale": "th", + "in_benchmark": true + }, + { + "bcp_47": "ps", + "speakers": 53542641, + "language_name": "Pashto", + "autonym": "پښتو", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": "ps_af", + "commonvoice_hours": 80.0, + "commonvoice_locale": "ps", + "in_benchmark": false + }, + { + "bcp_47": "kn", + "speakers": 49065330, + "language_name": "Kannada", + "autonym": "ಕನ್ನಡ", + "family": "Dravidian", + "flores_path": "kan_Knda", + "fleurs_tag": "kn_in", + "commonvoice_hours": 0.0, + "commonvoice_locale": "kn", + "in_benchmark": true + }, + { + "bcp_47": "pcm", + "speakers": 44945880, + "language_name": "Nigerian Pidgin", + "autonym": "Naijíriá Píjin", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 14.0, + "commonvoice_locale": "pcm", + "in_benchmark": false + }, + { + "bcp_47": "ml", + "speakers": 43257484, + "language_name": "Malayalam", + "autonym": "മലയാളം", + "family": "Dravidian", + "flores_path": "mal_Mlym", + "fleurs_tag": "ml_in", + "commonvoice_hours": 2.8, + "commonvoice_locale": "ml", + "in_benchmark": true + }, + { + "bcp_47": "or", + "speakers": 42434880, + "language_name": "Odia", + "autonym": "ଓଡ଼ିଆ", + "family": "Indo-European", + "flores_path": "ory_Orya", + "fleurs_tag": "or_in", + "commonvoice_hours": 2.8, + "commonvoice_locale": "or", + "in_benchmark": true + }, + { + "bcp_47": "pl", + "speakers": 41077399, + "language_name": "Polish", + "autonym": "Polski", + "family": "Indo-European", + "flores_path": "pol_Latn", + "fleurs_tag": "pl_pl", + "commonvoice_hours": 174.0, + "commonvoice_locale": "pl", + "in_benchmark": true + }, + { + "bcp_47": "hsn", + "speakers": 40426580, + "language_name": "Xiang Chinese", + "autonym": "Xiang Chinese", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ha", + "speakers": 40411882, + "language_name": "Hausa", + "autonym": "Hausa", + "family": "Afro-Asiatic", + "flores_path": "hau_Latn", + "fleurs_tag": "ha_ng", + "commonvoice_hours": 4.1, + "commonvoice_locale": "ha", + "in_benchmark": true + }, + { + "bcp_47": "sd", + "speakers": 40329510, + "language_name": "Sindhi", + "autonym": "سنڌي", + "family": "Indo-European", + "flores_path": "snd_Arab", + "fleurs_tag": "sd_in", + "commonvoice_hours": 0.4, + "commonvoice_locale": "sd", + "in_benchmark": true + }, + { + "bcp_47": "apc", + "speakers": 39031474, + "language_name": "North Levantine Arabic", + "autonym": "العامية", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ms", + "speakers": 38097307, + "language_name": "Malay", + "autonym": "Bahasa Malaysia", + "family": "Austronesian", + "flores_path": "zsm_Latn", + "fleurs_tag": "ms_my", + "commonvoice_hours": 0.0, + "commonvoice_locale": "ms", + "in_benchmark": true + }, + { + "bcp_47": "my", + "speakers": 36559231, + "language_name": "Burmese", + "autonym": "မြန်မာ", + "family": "Sino-Tibetan", + "flores_path": "mya_Mymr", + "fleurs_tag": "my_mm", + "commonvoice_hours": 0.0, + "commonvoice_locale": "my", + "in_benchmark": true + }, + { + "bcp_47": "am", + "speakers": 35728475, + "language_name": "Amharic", + "autonym": "አማርኛ", + "family": "Afro-Asiatic", + "flores_path": "amh_Ethi", + "fleurs_tag": "am_et", + "commonvoice_hours": 1.8, + "commonvoice_locale": "am", + "in_benchmark": true + }, + { + "bcp_47": "arq", + "speakers": 35667507, + "language_name": "Algerian Arabic", + "autonym": "Algerian Arabic", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "om", + "speakers": 34897121, + "language_name": "Oromo", + "autonym": "Oromoo", + "family": "Afro-Asiatic", + "flores_path": "gaz_Latn", + "fleurs_tag": "om_et", + "commonvoice_hours": 0.0, + "commonvoice_locale": "om", + "in_benchmark": true + }, + { + "bcp_47": "bho", + "speakers": 32934797, + "language_name": "Bhojpuri", + "autonym": "भोजपुरी", + "family": "Indo-European", + "flores_path": "bho_Deva", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "uz", + "speakers": 32792780, + "language_name": "Uzbek", + "autonym": "O‘Zbek", + "family": "Turkic", + "flores_path": "uzn_Latn", + "fleurs_tag": "uz_uz", + "commonvoice_hours": 100.0, + "commonvoice_locale": "uz", + "in_benchmark": true + }, + { + "bcp_47": "az", + "speakers": 32446682, + "language_name": "Azerbaijani", + "autonym": "Azərbaycan", + "family": "Turkic", + "flores_path": "azj_Latn", + "fleurs_tag": "az_az", + "commonvoice_hours": 0.5, + "commonvoice_locale": "az", + "in_benchmark": true + }, + { + "bcp_47": "hak", + "speakers": 32062460, + "language_name": "Hakka Chinese", + "autonym": "Hakka Chinese", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "su", + "speakers": 32043120, + "language_name": "Sundanese", + "autonym": "Basa Sunda", + "family": "Austronesian", + "flores_path": "sun_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "nl", + "speakers": 31765645, + "language_name": "Dutch", + "autonym": "Nederlands", + "family": "Indo-European", + "flores_path": "nld_Latn", + "fleurs_tag": "nl_nl", + "commonvoice_hours": 115.0, + "commonvoice_locale": "nl", + "in_benchmark": true + }, + { + "bcp_47": "ary", + "speakers": 30938679, + "language_name": "Moroccan Arabic", + "autonym": "Moroccan Arabic", + "family": "Afro-Asiatic", + "flores_path": "ary_Arab", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "uk", + "speakers": 29348975, + "language_name": "Ukrainian", + "autonym": "Українська", + "family": "Indo-European", + "flores_path": "ukr_Cyrl", + "fleurs_tag": "uk_ua", + "commonvoice_hours": 99.0, + "commonvoice_locale": "uk", + "in_benchmark": true + }, + { + "bcp_47": "yo", + "speakers": 28685568, + "language_name": "Yoruba", + "autonym": "Èdè Yorùbá", + "family": "Atlantic-Congo", + "flores_path": "yor_Latn", + "fleurs_tag": "yo_ng", + "commonvoice_hours": 6.0, + "commonvoice_locale": "yo", + "in_benchmark": true + }, + { + "bcp_47": "skr", + "speakers": 28020120, + "language_name": "Saraiki", + "autonym": "سرائیکی", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 4.3, + "commonvoice_locale": "skr", + "in_benchmark": false + }, + { + "bcp_47": "ig", + "speakers": 27823640, + "language_name": "Igbo", + "autonym": "Igbo", + "family": "Atlantic-Congo", + "flores_path": "ibo_Latn", + "fleurs_tag": "ig_ng", + "commonvoice_hours": 0.0, + "commonvoice_locale": "ig", + "in_benchmark": true + }, + { + "bcp_47": "nan", + "speakers": 26486380, + "language_name": "Min Nan Chinese", + "autonym": "Min Nan Chinese", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ceb", + "speakers": 26203440, + "language_name": "Cebuano", + "autonym": "Cebuano", + "family": "Austronesian", + "flores_path": "ceb_Latn", + "fleurs_tag": "ceb_ph", + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "awa", + "speakers": 25862924, + "language_name": "Awadhi", + "autonym": "Awadhi", + "family": "Indo-European", + "flores_path": "awa_Deva", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "mg", + "speakers": 24260130, + "language_name": "Malagasy", + "autonym": "Malagasy", + "family": "Austronesian", + "flores_path": "plt_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "mg", + "in_benchmark": true + }, + { + "bcp_47": "gan", + "speakers": 23698340, + "language_name": "Gan Chinese", + "autonym": "Gan Chinese", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ro", + "speakers": 22187408, + "language_name": "Romanian", + "autonym": "Română", + "family": "Indo-European", + "flores_path": "ron_Latn", + "fleurs_tag": "ro_ro", + "commonvoice_hours": 21.0, + "commonvoice_locale": "ro", + "in_benchmark": true + }, + { + "bcp_47": "bar", + "speakers": 22043627, + "language_name": "Bavarian", + "autonym": "Bavarian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ne", + "speakers": 20903374, + "language_name": "Nepali", + "autonym": "नेपाली", + "family": "Indo-European", + "flores_path": "npi_Deva", + "fleurs_tag": "ne_np", + "commonvoice_hours": 1.3, + "commonvoice_locale": "ne-NP", + "in_benchmark": true + }, + { + "bcp_47": "mai", + "speakers": 19249149, + "language_name": "Maithili", + "autonym": "मैथिली", + "family": "Indo-European", + "flores_path": "mai_Deva", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "mai", + "in_benchmark": true + }, + { + "bcp_47": "as", + "speakers": 17239170, + "language_name": "Assamese", + "autonym": "অসমীয়া", + "family": "Indo-European", + "flores_path": "asm_Beng", + "fleurs_tag": "as_in", + "commonvoice_hours": 2.8, + "commonvoice_locale": "as", + "in_benchmark": true + }, + { + "bcp_47": "ny", + "speakers": 17026781, + "language_name": "Nyanja", + "autonym": "Nyanja", + "family": "Atlantic-Congo", + "flores_path": "nya_Latn", + "fleurs_tag": "ny_mw", + "commonvoice_hours": 0.0, + "commonvoice_locale": "ny", + "in_benchmark": true + }, + { + "bcp_47": "so", + "speakers": 16911645, + "language_name": "Somali", + "autonym": "Soomaali", + "family": "Afro-Asiatic", + "flores_path": "som_Latn", + "fleurs_tag": "so_so", + "commonvoice_hours": 0.0, + "commonvoice_locale": "so", + "in_benchmark": true + }, + { + "bcp_47": "mad", + "speakers": 16822638, + "language_name": "Madurese", + "autonym": "Madurese", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tts", + "speakers": 16554576, + "language_name": "Northeastern Thai", + "autonym": "Northeastern Thai", + "family": "Tai-Kadai", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rkt", + "speakers": 16274502, + "language_name": "Rangpuri", + "autonym": "Rangpuri", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mag", + "speakers": 15913080, + "language_name": "Magahi", + "autonym": "Magahi", + "family": "Indo-European", + "flores_path": "mag_Deva", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "bgc", + "speakers": 15913080, + "language_name": "Haryanvi", + "autonym": "हरियाणवी", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mwr", + "speakers": 15913080, + "language_name": "Marwari", + "autonym": "Marwari", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sr", + "speakers": 15602410, + "language_name": "Serbian", + "autonym": "Српски", + "family": "Indo-European", + "flores_path": "srp_Cyrl", + "fleurs_tag": "sr_rs", + "commonvoice_hours": 7.5, + "commonvoice_locale": "sr", + "in_benchmark": true + }, + { + "bcp_47": "si", + "speakers": 15564656, + "language_name": "Sinhala", + "autonym": "සිංහල", + "family": "Indo-European", + "flores_path": "sin_Sinh", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "si", + "in_benchmark": true + }, + { + "bcp_47": "km", + "speakers": 15065030, + "language_name": "Khmer", + "autonym": "ខ្មែរ", + "family": "Austroasiatic", + "flores_path": "khm_Khmr", + "fleurs_tag": "km_kh", + "commonvoice_hours": 0.0, + "commonvoice_locale": "km", + "in_benchmark": true + }, + { + "bcp_47": "hne", + "speakers": 14586990, + "language_name": "Chhattisgarhi", + "autonym": "Chhattisgarhi", + "family": "Indo-European", + "flores_path": "hne_Deva", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "fuv", + "speakers": 14339876, + "language_name": "Nigerian Fulfulde", + "autonym": "Nigerian Fulfulde", + "family": "Atlantic-Congo", + "flores_path": "fuv_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "zu", + "speakers": 13973830, + "language_name": "Zulu", + "autonym": "Isizulu", + "family": "Atlantic-Congo", + "flores_path": "zul_Latn", + "fleurs_tag": "zu_za", + "commonvoice_hours": 0.0, + "commonvoice_locale": "zu", + "in_benchmark": true + }, + { + "bcp_47": "kk", + "speakers": 13637392, + "language_name": "Kazakh", + "autonym": "Қазақ Тілі", + "family": "Turkic", + "flores_path": "kaz_Cyrl", + "fleurs_tag": "kk_kz", + "commonvoice_hours": 2.1, + "commonvoice_locale": "kk", + "in_benchmark": true + }, + { + "bcp_47": "dcc", + "speakers": 13128291, + "language_name": "Deccan", + "autonym": "Deccan", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cs", + "speakers": 13045532, + "language_name": "Czech", + "autonym": "Čeština", + "family": "Indo-European", + "flores_path": "ces_Latn", + "fleurs_tag": "cs_cz", + "commonvoice_hours": 74.0, + "commonvoice_locale": "cs", + "in_benchmark": true + }, + { + "bcp_47": "sv", + "speakers": 12932871, + "language_name": "Swedish", + "autonym": "Svenska", + "family": "Indo-European", + "flores_path": "swe_Latn", + "fleurs_tag": "sv_se", + "commonvoice_hours": 47.0, + "commonvoice_locale": "sv-SE", + "in_benchmark": true + }, + { + "bcp_47": "hu", + "speakers": 12443430, + "language_name": "Hungarian", + "autonym": "Magyar", + "family": "Uralic", + "flores_path": "hun_Latn", + "fleurs_tag": "hu_hu", + "commonvoice_hours": 92.0, + "commonvoice_locale": "hu", + "in_benchmark": true + }, + { + "bcp_47": "el", + "speakers": 12292242, + "language_name": "Greek", + "autonym": "Ελληνικά", + "family": "Indo-European", + "flores_path": "ell_Grek", + "fleurs_tag": "el_gr", + "commonvoice_hours": 20.0, + "commonvoice_locale": "el", + "in_benchmark": true + }, + { + "bcp_47": "sn", + "speakers": 11782503, + "language_name": "Shona", + "autonym": "Chishona", + "family": "Atlantic-Congo", + "flores_path": "sna_Latn", + "fleurs_tag": "sn_zw", + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "nds", + "speakers": 11520008, + "language_name": "Low German", + "autonym": "Neddersass’Sch", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ak", + "speakers": 11442678, + "language_name": "Akan", + "autonym": "Akan", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.2, + "commonvoice_locale": "tw", + "in_benchmark": false + }, + { + "bcp_47": "qu", + "speakers": 11385851, + "language_name": "Quechua", + "autonym": "Runasimi", + "family": "Quechuan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ckb", + "speakers": 11086549, + "language_name": "Central Kurdish", + "autonym": "کوردیی ناوەندی", + "family": "Indo-European", + "flores_path": "ckb_Arab", + "fleurs_tag": "ckb_iq", + "commonvoice_hours": 135.0, + "commonvoice_locale": "ckb", + "in_benchmark": true + }, + { + "bcp_47": "rw", + "speakers": 11083625, + "language_name": "Kinyarwanda", + "autonym": "Kinyarwanda", + "family": "Atlantic-Congo", + "flores_path": "kin_Latn", + "fleurs_tag": null, + "commonvoice_hours": 2002.0, + "commonvoice_locale": "rw", + "in_benchmark": true + }, + { + "bcp_47": "wo", + "speakers": 11025494, + "language_name": "Wolof", + "autonym": "Wolof", + "family": "Atlantic-Congo", + "flores_path": "wol_Latn", + "fleurs_tag": "wo_sn", + "commonvoice_hours": 0.0, + "commonvoice_locale": "wo", + "in_benchmark": true + }, + { + "bcp_47": "aeb", + "speakers": 10549080, + "language_name": "Tunisian Arabic", + "autonym": "Tunisian Arabic", + "family": "Afro-Asiatic", + "flores_path": "aeb_Arab", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "ilo", + "speakers": 10481376, + "language_name": "Iloko", + "autonym": "Ilokano", + "family": "Austronesian", + "flores_path": "ilo_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "xh", + "speakers": 10182944, + "language_name": "Xhosa", + "autonym": "Isixhosa", + "family": "Atlantic-Congo", + "flores_path": "xho_Latn", + "fleurs_tag": "xh_za", + "commonvoice_hours": 0.0, + "commonvoice_locale": "xh", + "in_benchmark": true + }, + { + "bcp_47": "ti", + "speakers": 10145911, + "language_name": "Tigrinya", + "autonym": "ትግርኛ", + "family": "Afro-Asiatic", + "flores_path": "tir_Ethi", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ti", + "in_benchmark": true + }, + { + "bcp_47": "be", + "speakers": 10064517, + "language_name": "Belarusian", + "autonym": "Беларуская", + "family": "Indo-European", + "flores_path": "bel_Cyrl", + "fleurs_tag": "be_by", + "commonvoice_hours": 1806.0, + "commonvoice_locale": "be", + "in_benchmark": true + }, + { + "bcp_47": "lua", + "speakers": 9770880, + "language_name": "Luba-Lulua", + "autonym": "Luba-Lulua", + "family": "Atlantic-Congo", + "flores_path": "lua_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "lua", + "in_benchmark": true + }, + { + "bcp_47": "tg", + "speakers": 9644223, + "language_name": "Tajik", + "autonym": "Тоҷикӣ", + "family": "Indo-European", + "flores_path": "tgk_Cyrl", + "fleurs_tag": "tg_tj", + "commonvoice_hours": 0.0, + "commonvoice_locale": "tg", + "in_benchmark": true + }, + { + "bcp_47": "umb", + "speakers": 9431467, + "language_name": "Umbundu", + "autonym": "Umbundu", + "family": "Atlantic-Congo", + "flores_path": "umb_Latn", + "fleurs_tag": "umb_ao", + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "bm", + "speakers": 9385632, + "language_name": "Bambara", + "autonym": "Bamanakan", + "family": "Mande", + "flores_path": "bam_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "bm", + "in_benchmark": true + }, + { + "bcp_47": "af", + "speakers": 9318845, + "language_name": "Afrikaans", + "autonym": "Afrikaans", + "family": "Indo-European", + "flores_path": "afr_Latn", + "fleurs_tag": "af_za", + "commonvoice_hours": 0.5, + "commonvoice_locale": "af", + "in_benchmark": true + }, + { + "bcp_47": "hil", + "speakers": 9171204, + "language_name": "Hiligaynon", + "autonym": "Ilonggo", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "hil", + "in_benchmark": false + }, + { + "bcp_47": "ki", + "speakers": 9099743, + "language_name": "Kikuyu", + "autonym": "Gikuyu", + "family": "Atlantic-Congo", + "flores_path": "kik_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ki", + "in_benchmark": true + }, + { + "bcp_47": "ht", + "speakers": 8964918, + "language_name": "Haitian Creole", + "autonym": "Haitian Creole", + "family": "Indo-European", + "flores_path": "hat_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ht", + "in_benchmark": true + }, + { + "bcp_47": "ca", + "speakers": 8679139, + "language_name": "Catalan", + "autonym": "Català", + "family": "Indo-European", + "flores_path": "cat_Latn", + "fleurs_tag": "ca_es", + "commonvoice_hours": 2847.0, + "commonvoice_locale": "ca", + "in_benchmark": true + }, + { + "bcp_47": "he", + "speakers": 8675480, + "language_name": "Hebrew", + "autonym": "עברית", + "family": "Afro-Asiatic", + "flores_path": "heb_Hebr", + "fleurs_tag": "he_il", + "commonvoice_hours": 1.1, + "commonvoice_locale": "he", + "in_benchmark": true + }, + { + "bcp_47": "ii", + "speakers": 8364120, + "language_name": "Sichuan Yi", + "autonym": "ꆈꌠꉙ", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mos", + "speakers": 8334160, + "language_name": "Mossi", + "autonym": "Mossi", + "family": "Atlantic-Congo", + "flores_path": "mos_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "mos", + "in_benchmark": true + }, + { + "bcp_47": "bal", + "speakers": 8227887, + "language_name": "Baluchi", + "autonym": "بلۆچی", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "bal", + "in_benchmark": false + }, + { + "bcp_47": "syl", + "speakers": 8132550, + "language_name": "Sylheti", + "autonym": "Sylheti", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kmb", + "speakers": 8130575, + "language_name": "Kimbundu", + "autonym": "Kimbundu", + "family": "Atlantic-Congo", + "flores_path": "kmb_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "ug", + "speakers": 8052967, + "language_name": "Uyghur", + "autonym": "ئۇيغۇرچە", + "family": "Turkic", + "flores_path": "uig_Arab", + "fleurs_tag": null, + "commonvoice_hours": 366.0, + "commonvoice_locale": "ug", + "in_benchmark": true + }, + { + "bcp_47": "min", + "speakers": 8010780, + "language_name": "Minangkabau", + "autonym": "Minangkabau", + "family": "Austronesian", + "flores_path": "min_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "gsw", + "speakers": 7956952, + "language_name": "Swiss German", + "autonym": "Schwiizertüütsch", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "gsw", + "in_benchmark": false + }, + { + "bcp_47": "bg", + "speakers": 7878315, + "language_name": "Bulgarian", + "autonym": "Български", + "family": "Indo-European", + "flores_path": "bul_Cyrl", + "fleurs_tag": "bg_bg", + "commonvoice_hours": 16.0, + "commonvoice_locale": "bg", + "in_benchmark": true + }, + { + "bcp_47": "zgh", + "speakers": 7823574, + "language_name": "Standard Moroccan Tamazight", + "autonym": "ⵜⴰⵎⴰⵣⵉⵖⵜ", + "family": "Afro-Asiatic", + "flores_path": "zgh_Tfng", + "fleurs_tag": null, + "commonvoice_hours": 1.3, + "commonvoice_locale": "zgh", + "in_benchmark": true + }, + { + "bcp_47": "ff", + "speakers": 7788904, + "language_name": "Fula", + "autonym": "Pulaar", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": "ff_sn", + "commonvoice_hours": 0.0, + "commonvoice_locale": "ff", + "in_benchmark": false + }, + { + "bcp_47": "bs", + "speakers": 7594468, + "language_name": "Bosnian", + "autonym": "Bosanski", + "family": "Indo-European", + "flores_path": "bos_Latn", + "fleurs_tag": "bs_ba", + "commonvoice_hours": 0.0, + "commonvoice_locale": "bs", + "in_benchmark": true + }, + { + "bcp_47": "rn", + "speakers": 7475454, + "language_name": "Rundi", + "autonym": "Ikirundi", + "family": "Atlantic-Congo", + "flores_path": "run_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "rn", + "in_benchmark": true + }, + { + "bcp_47": "bjj", + "speakers": 7426104, + "language_name": "Kanauji", + "autonym": "Kanauji", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sat", + "speakers": 7293495, + "language_name": "Santali", + "autonym": "ᱥᱟᱱᱛᱟᱲᱤ", + "family": "Austroasiatic", + "flores_path": "sat_Olck", + "fleurs_tag": null, + "commonvoice_hours": 0.5, + "commonvoice_locale": "sat", + "in_benchmark": true + }, + { + "bcp_47": "da", + "speakers": 7072056, + "language_name": "Danish", + "autonym": "Dansk", + "family": "Indo-European", + "flores_path": "dan_Latn", + "fleurs_tag": "da_dk", + "commonvoice_hours": 13.0, + "commonvoice_locale": "da", + "in_benchmark": true + }, + { + "bcp_47": "tk", + "speakers": 6870838, + "language_name": "Turkmen", + "autonym": "Türkmen Dili", + "family": "Turkic", + "flores_path": "tuk_Latn", + "fleurs_tag": null, + "commonvoice_hours": 2.8, + "commonvoice_locale": "tk", + "in_benchmark": true + }, + { + "bcp_47": "ku", + "speakers": 6866757, + "language_name": "Kurdish", + "autonym": "Kurdî (Kurmancî)", + "family": "Indo-European", + "flores_path": "kmr_Latn", + "fleurs_tag": null, + "commonvoice_hours": 69.0, + "commonvoice_locale": "kmr", + "in_benchmark": true + }, + { + "bcp_47": "hr", + "speakers": 6813164, + "language_name": "Croatian", + "autonym": "Hrvatski", + "family": "Indo-European", + "flores_path": "hrv_Latn", + "fleurs_tag": "hr_hr", + "commonvoice_hours": 0.0, + "commonvoice_locale": "hr", + "in_benchmark": true + }, + { + "bcp_47": "sq", + "speakers": 6791906, + "language_name": "Albanian", + "autonym": "Shqip", + "family": "Indo-European", + "flores_path": "als_Latn", + "fleurs_tag": null, + "commonvoice_hours": 8.8, + "commonvoice_locale": "sq", + "in_benchmark": true + }, + { + "bcp_47": "sk", + "speakers": 6680269, + "language_name": "Slovak", + "autonym": "Slovenčina", + "family": "Indo-European", + "flores_path": "slk_Latn", + "fleurs_tag": "sk_sk", + "commonvoice_hours": 48.0, + "commonvoice_locale": "sk", + "in_benchmark": true + }, + { + "bcp_47": "dyu", + "speakers": 6667328, + "language_name": "Dyula", + "autonym": "Dyula", + "family": "Mande", + "flores_path": "dyu_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.4, + "commonvoice_locale": "dyu", + "in_benchmark": true + }, + { + "bcp_47": "nod", + "speakers": 6621830, + "language_name": "Northern Thai", + "autonym": "Northern Thai", + "family": "Tai-Kadai", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mn", + "speakers": 6572846, + "language_name": "Mongolian", + "autonym": "Монгол", + "family": "Mongolic-Khitan", + "flores_path": "khk_Cyrl", + "fleurs_tag": "mn_mn", + "commonvoice_hours": 46.0, + "commonvoice_locale": "mn", + "in_benchmark": true + }, + { + "bcp_47": "st", + "speakers": 6390567, + "language_name": "Southern Sotho", + "autonym": "Sesotho", + "family": "Atlantic-Congo", + "flores_path": "sot_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "st", + "in_benchmark": true + }, + { + "bcp_47": "kri", + "speakers": 6293684, + "language_name": "Krio", + "autonym": "Krio", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "shi", + "speakers": 6187736, + "language_name": "Tachelhit", + "autonym": "ⵜⴰⵛⵍⵃⵉⵜ", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "shi", + "in_benchmark": false + }, + { + "bcp_47": "tn", + "speakers": 6113428, + "language_name": "Tswana", + "autonym": "Tswana", + "family": "Atlantic-Congo", + "flores_path": "tsn_Latn", + "fleurs_tag": null, + "commonvoice_hours": 4.2, + "commonvoice_locale": "tn", + "in_benchmark": true + }, + { + "bcp_47": "wtm", + "speakers": 6100014, + "language_name": "Mewati", + "autonym": "Mewati", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "luy", + "speakers": 5888069, + "language_name": "Luyia", + "autonym": "Luluhia", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "bxk", + "in_benchmark": false + }, + { + "bcp_47": "gn", + "speakers": 5827107, + "language_name": "Guarani", + "autonym": "Avañe’Ẽ", + "family": "Tupian", + "flores_path": "gug_Latn", + "fleurs_tag": null, + "commonvoice_hours": 3.7, + "commonvoice_locale": "gn", + "in_benchmark": true + }, + { + "bcp_47": "fi", + "speakers": 5736842, + "language_name": "Finnish", + "autonym": "Suomi", + "family": "Uralic", + "flores_path": "fin_Latn", + "fleurs_tag": "fi_fi", + "commonvoice_hours": 15.0, + "commonvoice_locale": "fi", + "in_benchmark": true + }, + { + "bcp_47": "lg", + "speakers": 5622890, + "language_name": "Ganda", + "autonym": "Luganda", + "family": "Atlantic-Congo", + "flores_path": "lug_Latn", + "fleurs_tag": "lg_ug", + "commonvoice_hours": 437.0, + "commonvoice_locale": "lg", + "in_benchmark": true + }, + { + "bcp_47": "bew", + "speakers": 5607546, + "language_name": "Betawi", + "autonym": "Betawi", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "bew", + "in_benchmark": false + }, + { + "bcp_47": "ks", + "speakers": 5598085, + "language_name": "Kashmiri", + "autonym": "کٲشُر", + "family": "Indo-European", + "flores_path": "kas_Arab", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ks", + "in_benchmark": true + }, + { + "bcp_47": "sou", + "speakers": 5518192, + "language_name": "Southern Thai", + "autonym": "Southern Thai", + "family": "Tai-Kadai", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nb", + "speakers": 5468932, + "language_name": "Norwegian Bokmål", + "autonym": "Norsk Bokmål", + "family": "Indo-European", + "flores_path": "nob_Latn", + "fleurs_tag": "nb_no", + "commonvoice_hours": 0.1, + "commonvoice_locale": "nb-NO", + "in_benchmark": true + }, + { + "bcp_47": "no", + "speakers": 5467440, + "language_name": "Norwegian", + "autonym": "Norsk", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bem", + "speakers": 5402246, + "language_name": "Bemba", + "autonym": "Ichibemba", + "family": "Atlantic-Congo", + "flores_path": "bem_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "hy", + "speakers": 5317273, + "language_name": "Armenian", + "autonym": "Հայերեն", + "family": "Indo-European", + "flores_path": "hye_Armn", + "fleurs_tag": "hy_am", + "commonvoice_hours": 31.0, + "commonvoice_locale": "hy-AM", + "in_benchmark": true + }, + { + "bcp_47": "nso", + "speakers": 5307578, + "language_name": "Northern Sotho", + "autonym": "Northern Sotho", + "family": "Atlantic-Congo", + "flores_path": "nso_Latn", + "fleurs_tag": "nso_za", + "commonvoice_hours": 0.0, + "commonvoice_locale": "nso", + "in_benchmark": true + }, + { + "bcp_47": "luo", + "speakers": 5245734, + "language_name": "Luo (Kenya and Tanzania)", + "autonym": "Dholuo", + "family": "Nilotic", + "flores_path": "luo_Latn", + "fleurs_tag": "luo_ke", + "commonvoice_hours": 30.0, + "commonvoice_locale": "luo", + "in_benchmark": true + }, + { + "bcp_47": "tpi", + "speakers": 5154217, + "language_name": "Tok Pisin", + "autonym": "Tok Pisin", + "family": "Indo-European", + "flores_path": "tpi_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "lo", + "speakers": 5138706, + "language_name": "Lao", + "autonym": "ລາວ", + "family": "Tai-Kadai", + "flores_path": "lao_Laoo", + "fleurs_tag": "lo_la", + "commonvoice_hours": 0.2, + "commonvoice_locale": "lo", + "in_benchmark": true + }, + { + "bcp_47": "suk", + "speakers": 5094094, + "language_name": "Sukuma", + "autonym": "Sukuma", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kok", + "speakers": 4906533, + "language_name": "Konkani", + "autonym": "कोंकणी", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "knn", + "in_benchmark": false + }, + { + "bcp_47": "ts", + "speakers": 4880932, + "language_name": "Tsonga", + "autonym": "Tsonga", + "family": "Atlantic-Congo", + "flores_path": "tso_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ts", + "in_benchmark": true + }, + { + "bcp_47": "vmf", + "speakers": 4809582, + "language_name": "Main-Franconian", + "autonym": "Main-Franconian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ban", + "speakers": 4806468, + "language_name": "Balinese", + "autonym": "Balinese", + "family": "Austronesian", + "flores_path": "ban_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "ee", + "speakers": 4690857, + "language_name": "Ewe", + "autonym": "Eʋegbe", + "family": "Atlantic-Congo", + "flores_path": "ewe_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ee", + "in_benchmark": true + }, + { + "bcp_47": "za", + "speakers": 4321462, + "language_name": "Zhuang", + "autonym": "Vahcuengh", + "family": "Tai-Kadai", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bug", + "speakers": 4298211, + "language_name": "Buginese", + "autonym": "Buginese", + "family": "Austronesian", + "flores_path": "bug_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "mzn", + "speakers": 4246165, + "language_name": "Mazanderani", + "autonym": "مازرونی", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gom", + "speakers": 4243488, + "language_name": "Goan Konkani", + "autonym": "Goan Konkani", + "family": "Indo-European", + "flores_path": "gom_Deva", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "gom", + "in_benchmark": true + }, + { + "bcp_47": "kam", + "speakers": 4068120, + "language_name": "Kamba", + "autonym": "Kikamba", + "family": "Atlantic-Congo", + "flores_path": "kam_Latn", + "fleurs_tag": "kam_ke", + "commonvoice_hours": 0.0, + "commonvoice_locale": "kam", + "in_benchmark": true + }, + { + "bcp_47": "kln", + "speakers": 4068120, + "language_name": "Kalenjin", + "autonym": "Kalenjin", + "family": "Nilotic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 43.0, + "commonvoice_locale": "kln", + "in_benchmark": false + }, + { + "bcp_47": "bjn", + "speakers": 4010288, + "language_name": "Banjar", + "autonym": "Banjar", + "family": "Austronesian", + "flores_path": "bjn_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "hno", + "speakers": 3969517, + "language_name": "Northern Hindko", + "autonym": "Northern Hindko", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "hno", + "in_benchmark": false + }, + { + "bcp_47": "vmw", + "speakers": 3912766, + "language_name": "Makhuwa", + "autonym": "Emakhuwa", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "vmw", + "in_benchmark": false + }, + { + "bcp_47": "glk", + "speakers": 3906472, + "language_name": "Gilaki", + "autonym": "Gilaki", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lmo", + "speakers": 3901518, + "language_name": "Lombard", + "autonym": "Lombard", + "family": "Indo-European", + "flores_path": "lmo_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "dje", + "speakers": 3871308, + "language_name": "Zarma", + "autonym": "Zarmaciine", + "family": "Songhay", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ndc", + "speakers": 3867046, + "language_name": "Ndau", + "autonym": "Ndau", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sid", + "speakers": 3783955, + "language_name": "Sidamo", + "autonym": "Sidamo", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ace", + "speakers": 3738364, + "language_name": "Achinese", + "autonym": "Achinese", + "family": "Austronesian", + "flores_path": "ace_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ace", + "in_benchmark": true + }, + { + "bcp_47": "swv", + "speakers": 3713052, + "language_name": "Shekhawati", + "autonym": "Shekhawati", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rif", + "speakers": 3692411, + "language_name": "Riffian", + "autonym": "Tarifit", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "rif", + "in_benchmark": false + }, + { + "bcp_47": "shn", + "speakers": 3687984, + "language_name": "Shan", + "autonym": "တႆး", + "family": "Tai-Kadai", + "flores_path": "shn_Mymr", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "shn", + "in_benchmark": true + }, + { + "bcp_47": "lmn", + "speakers": 3580443, + "language_name": "Lambadi", + "autonym": "Lambadi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gbm", + "speakers": 3580443, + "language_name": "Garhwali", + "autonym": "Garhwali", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ka", + "speakers": 3543646, + "language_name": "Georgian", + "autonym": "ქართული", + "family": "Kartvelian", + "flores_path": "kat_Geor", + "fleurs_tag": "ka_ge", + "commonvoice_hours": 161.0, + "commonvoice_locale": "ka", + "in_benchmark": true + }, + { + "bcp_47": "gl", + "speakers": 3515530, + "language_name": "Galician", + "autonym": "Galego", + "family": "Indo-European", + "flores_path": "glg_Latn", + "fleurs_tag": "gl_es", + "commonvoice_hours": 111.0, + "commonvoice_locale": "gl", + "in_benchmark": true + }, + { + "bcp_47": "ln", + "speakers": 3514491, + "language_name": "Lingala", + "autonym": "Lingála", + "family": "Atlantic-Congo", + "flores_path": "lin_Latn", + "fleurs_tag": "ln_cd", + "commonvoice_hours": 0.0, + "commonvoice_locale": "ln", + "in_benchmark": true + }, + { + "bcp_47": "man", + "speakers": 3511762, + "language_name": "Mandingo", + "autonym": "Mandingo", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tzm", + "speakers": 3485047, + "language_name": "Central Atlas Tamazight", + "autonym": "Tamaziɣt N Laṭlaṣ", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mfa", + "speakers": 3448870, + "language_name": "Pattani Malay", + "autonym": "Pattani Malay", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tiv", + "speakers": 3424448, + "language_name": "Tiv", + "autonym": "Tiv", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kab", + "speakers": 3351886, + "language_name": "Kabyle", + "autonym": "Taqbaylit", + "family": "Afro-Asiatic", + "flores_path": "kab_Latn", + "fleurs_tag": null, + "commonvoice_hours": 571.0, + "commonvoice_locale": "kab", + "in_benchmark": true + }, + { + "bcp_47": "ky", + "speakers": 3338267, + "language_name": "Kyrgyz", + "autonym": "Кыргызча", + "family": "Turkic", + "flores_path": "kir_Cyrl", + "fleurs_tag": "ky_kg", + "commonvoice_hours": 39.0, + "commonvoice_locale": "ky", + "in_benchmark": true + }, + { + "bcp_47": "bik", + "speakers": 3275430, + "language_name": "Bikol", + "autonym": "Bikol", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fon", + "speakers": 3216150, + "language_name": "Fon", + "autonym": "Fon", + "family": "Atlantic-Congo", + "flores_path": "fon_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "gon", + "speakers": 3182616, + "language_name": "Gondi", + "autonym": "Gondi", + "family": "Dravidian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "war", + "speakers": 3166927, + "language_name": "Waray", + "autonym": "Waray", + "family": "Austronesian", + "flores_path": "war_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "sdh", + "speakers": 3142162, + "language_name": "Southern Kurdish", + "autonym": "کوردی خوارگ", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "sdh", + "in_benchmark": false + }, + { + "bcp_47": "brh", + "speakers": 3035513, + "language_name": "Brahui", + "autonym": "Brahui", + "family": "Dravidian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bci", + "speakers": 3022921, + "language_name": "Baoulé", + "autonym": "Baoulé", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 2.0, + "commonvoice_locale": "bci", + "in_benchmark": false + }, + { + "bcp_47": "bo", + "speakers": 3006697, + "language_name": "Tibetan", + "autonym": "བོད་སྐད་", + "family": "Sino-Tibetan", + "flores_path": "bod_Tibt", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "bo", + "in_benchmark": true + }, + { + "bcp_47": "ibb", + "speakers": 2996392, + "language_name": "Ibibio", + "autonym": "Ibibio", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 2.6, + "commonvoice_locale": "ibb", + "in_benchmark": false + }, + { + "bcp_47": "efi", + "speakers": 2996392, + "language_name": "Efik", + "autonym": "Efik", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sg", + "speakers": 2935521, + "language_name": "Sango", + "autonym": "Sängö", + "family": "Atlantic-Congo", + "flores_path": "sag_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "kfy", + "speakers": 2917398, + "language_name": "Kumaoni", + "autonym": "Kumaoni", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ay", + "speakers": 2838620, + "language_name": "Aymara", + "autonym": "Aymara", + "family": "Aymaran", + "flores_path": "ayr_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "nyn", + "speakers": 2724939, + "language_name": "Nyankole", + "autonym": "Runyankore", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "nyn", + "in_benchmark": false + }, + { + "bcp_47": "jam", + "speakers": 2668142, + "language_name": "Jamaican Creole English", + "autonym": "Jamaican Creole English", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "doi", + "speakers": 2652180, + "language_name": "Dogri", + "autonym": "डोगरी", + "family": "Indo-European", + "flores_path": "dgo_Deva", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "guz", + "speakers": 2622867, + "language_name": "Gusii", + "autonym": "Ekegusii", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sas", + "speakers": 2590152, + "language_name": "Sasak", + "autonym": "Sasak", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kru", + "speakers": 2519571, + "language_name": "Kurukh", + "autonym": "Kurukh", + "family": "Dravidian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pam", + "speakers": 2511163, + "language_name": "Pampanga", + "autonym": "Pampanga", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fbl", + "speakers": 2511163, + "language_name": "West Albay Bikol", + "autonym": "West Albay Bikol", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lt", + "speakers": 2488617, + "language_name": "Lithuanian", + "autonym": "Lietuvių", + "family": "Indo-European", + "flores_path": "lit_Latn", + "fleurs_tag": "lt_lt", + "commonvoice_hours": 25.0, + "commonvoice_locale": "lt", + "in_benchmark": true + }, + { + "bcp_47": "bej", + "speakers": 2460326, + "language_name": "Beja", + "autonym": "Beja", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bbc", + "speakers": 2456639, + "language_name": "Batak Toba", + "autonym": "Batak Toba", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sck", + "speakers": 2386962, + "language_name": "Sadri", + "autonym": "Sadri", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "wbq", + "speakers": 2386962, + "language_name": "Waddar", + "autonym": "Waddar", + "family": "Dravidian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lu", + "speakers": 2340940, + "language_name": "Luba-Katanga", + "autonym": "Tshiluba", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cgg", + "speakers": 2335662, + "language_name": "Chiga", + "autonym": "Rukiga", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "cgg", + "in_benchmark": false + }, + { + "bcp_47": "xog", + "speakers": 2292409, + "language_name": "Soga", + "autonym": "Olusoga", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ss", + "speakers": 2212379, + "language_name": "Swati", + "autonym": "Siswati", + "family": "Atlantic-Congo", + "flores_path": "ssw_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ss", + "in_benchmark": true + }, + { + "bcp_47": "haz", + "speakers": 2161984, + "language_name": "Hazaragi", + "autonym": "Hazaragi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mer", + "speakers": 2141116, + "language_name": "Meru", + "autonym": "Kĩmĩrũ", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "xnr", + "speakers": 2121744, + "language_name": "Kangri", + "autonym": "कांगड़ी", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "aa", + "speakers": 2119663, + "language_name": "Afar", + "autonym": "Afar", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "aa", + "in_benchmark": false + }, + { + "bcp_47": "teo", + "speakers": 2082973, + "language_name": "Teso", + "autonym": "Kiteso", + "family": "Nilotic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ngl", + "speakers": 2046678, + "language_name": "Lomwe", + "autonym": "Lomwe", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "oc", + "speakers": 2040398, + "language_name": "Occitan", + "autonym": "Occitan", + "family": "Indo-European", + "flores_path": "oci_Latn", + "fleurs_tag": "oc_fr", + "commonvoice_hours": 1.8, + "commonvoice_locale": "oc", + "in_benchmark": true + }, + { + "bcp_47": "bgn", + "speakers": 2037382, + "language_name": "Western Balochi", + "autonym": "بلوچی (رخشانی)", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lrc", + "speakers": 2020512, + "language_name": "Northern Luri", + "autonym": "لۊری شومالی", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "wbr", + "speakers": 1989135, + "language_name": "Wagdi", + "autonym": "Wagdi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tcy", + "speakers": 1989135, + "language_name": "Tulu", + "autonym": "Tulu", + "family": "Dravidian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "khn", + "speakers": 1989135, + "language_name": "Khandesi", + "autonym": "Khandesi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tt", + "speakers": 1984108, + "language_name": "Tatar", + "autonym": "Татар", + "family": "Turkic", + "flores_path": "tat_Cyrl", + "fleurs_tag": null, + "commonvoice_hours": 32.0, + "commonvoice_locale": "tt", + "in_benchmark": true + }, + { + "bcp_47": "sl", + "speakers": 1973181, + "language_name": "Slovenian", + "autonym": "Slovenščina", + "family": "Indo-European", + "flores_path": "slv_Latn", + "fleurs_tag": "sl_si", + "commonvoice_hours": 18.0, + "commonvoice_locale": "sl", + "in_benchmark": true + }, + { + "bcp_47": "mak", + "speakers": 1949290, + "language_name": "Makasar", + "autonym": "Makasar", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "wal", + "speakers": 1946034, + "language_name": "Wolaytta", + "autonym": "Wolaytta", + "family": "Ta-Ne-Omotic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nym", + "speakers": 1932242, + "language_name": "Nyamwezi", + "autonym": "Nyamwezi", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "brx", + "speakers": 1856526, + "language_name": "Bodo", + "autonym": "बर’", + "family": "Sino-Tibetan", + "flores_path": "brx_Deva", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "ljp", + "speakers": 1842479, + "language_name": "Lampung Api", + "autonym": "Lampung Api", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cv", + "speakers": 1842386, + "language_name": "Chuvash", + "autonym": "Чӑваш", + "family": "Turkic", + "flores_path": "chv_Cyrl", + "fleurs_tag": null, + "commonvoice_hours": 27.0, + "commonvoice_locale": "cv", + "in_benchmark": true + }, + { + "bcp_47": "ba", + "speakers": 1842386, + "language_name": "Bashkir", + "autonym": "Башҡорт Теле", + "family": "Turkic", + "flores_path": "bak_Cyrl", + "fleurs_tag": null, + "commonvoice_hours": 259.0, + "commonvoice_locale": "ba", + "in_benchmark": true + }, + { + "bcp_47": "rhg", + "speakers": 1824082, + "language_name": "Rohingya", + "autonym": "𐴌𐴗𐴥𐴝𐴙𐴚𐴒𐴙𐴝", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "men", + "speakers": 1813083, + "language_name": "Mende", + "autonym": "Mende", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tum", + "speakers": 1780514, + "language_name": "Tumbuka", + "autonym": "Tumbuka", + "family": "Atlantic-Congo", + "flores_path": "tum_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "tmh", + "speakers": 1776965, + "language_name": "Tamashek", + "autonym": "Tamashek", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nd", + "speakers": 1745556, + "language_name": "North Ndebele", + "autonym": "Isindebele", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "nd", + "in_benchmark": false + }, + { + "bcp_47": "mas", + "speakers": 1734738, + "language_name": "Masai", + "autonym": "Maa", + "family": "Nilotic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "srr", + "speakers": 1731004, + "language_name": "Serer", + "autonym": "Serer", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "noe", + "speakers": 1723917, + "language_name": "Nimadi", + "autonym": "Nimadi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tem", + "speakers": 1722482, + "language_name": "Timne", + "autonym": "Timne", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sco", + "speakers": 1644028, + "language_name": "Scots", + "autonym": "Scots", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "sco", + "in_benchmark": false + }, + { + "bcp_47": "laj", + "speakers": 1643614, + "language_name": "Lango (Uganda)", + "autonym": "Lango (Uganda)", + "family": "Nilotic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rmt", + "speakers": 1613543, + "language_name": "Domari", + "autonym": "Domari", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mk", + "speakers": 1608565, + "language_name": "Macedonian", + "autonym": "Македонски", + "family": "Indo-European", + "flores_path": "mkd_Cyrl", + "fleurs_tag": "mk_mk", + "commonvoice_hours": 19.0, + "commonvoice_locale": "mk", + "in_benchmark": true + }, + { + "bcp_47": "ach", + "speakers": 1600361, + "language_name": "Acoli", + "autonym": "Acoli", + "family": "Nilotic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fuq", + "speakers": 1594068, + "language_name": "Central-Eastern Niger Fulfulde", + "autonym": "Central-Eastern Niger Fulfulde", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bhb", + "speakers": 1591308, + "language_name": "Bhili", + "autonym": "Bhili", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pag", + "speakers": 1528534, + "language_name": "Pangasinan", + "autonym": "Pangasinan", + "family": "Austronesian", + "flores_path": "pag_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "kg", + "speakers": 1526700, + "language_name": "Kongo", + "autonym": "Kongo", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bin", + "speakers": 1519599, + "language_name": "Bini", + "autonym": "Bini", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ffm", + "speakers": 1505612, + "language_name": "Maasina Fulfulde", + "autonym": "Maasina Fulfulde", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mni", + "speakers": 1476591, + "language_name": "Manipuri", + "autonym": "মৈতৈলোন্", + "family": "Sino-Tibetan", + "flores_path": "mni_Beng", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "mni", + "in_benchmark": true + }, + { + "bcp_47": "abr", + "speakers": 1467010, + "language_name": "Abron", + "autonym": "Abron", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kde", + "speakers": 1463820, + "language_name": "Makonde", + "autonym": "Chimakonde", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "aln", + "speakers": 1430250, + "language_name": "Gheg Albanian", + "autonym": "Gheg Albanian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "aln", + "in_benchmark": false + }, + { + "bcp_47": "ve", + "speakers": 1391759, + "language_name": "Venda", + "autonym": "Tshivenḓa", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ve", + "in_benchmark": false + }, + { + "bcp_47": "seh", + "speakers": 1384517, + "language_name": "Sena", + "autonym": "Sena", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "vec", + "speakers": 1380829, + "language_name": "Venetian", + "autonym": "Veneto", + "family": "Indo-European", + "flores_path": "vec_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "vec", + "in_benchmark": true + }, + { + "bcp_47": "sus", + "speakers": 1378014, + "language_name": "Susu", + "autonym": "Susu", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nn", + "speakers": 1366860, + "language_name": "Norwegian Nynorsk", + "autonym": "Norsk Nynorsk", + "family": "Indo-European", + "flores_path": "nno_Latn", + "fleurs_tag": null, + "commonvoice_hours": 1.5, + "commonvoice_locale": "nn-NO", + "in_benchmark": true + }, + { + "bcp_47": "mgh", + "speakers": 1354419, + "language_name": "Makhuwa-Meetto", + "autonym": "Makua", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "raj", + "speakers": 1326090, + "language_name": "Rajasthani", + "autonym": "राजस्थानी", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "hoc", + "speakers": 1312829, + "language_name": "Ho", + "autonym": "Ho", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mdh", + "speakers": 1310172, + "language_name": "Maguindanaon", + "autonym": "Maguindanaon", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mtr", + "speakers": 1286307, + "language_name": "Mewari", + "autonym": "Mewari", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bum", + "speakers": 1276270, + "language_name": "Bulu", + "autonym": "Bulu", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 9.5, + "commonvoice_locale": "bum", + "in_benchmark": false + }, + { + "bcp_47": "myx", + "speakers": 1254337, + "language_name": "Masaaba", + "autonym": "Masaaba", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "unr", + "speakers": 1252287, + "language_name": "Mundari", + "autonym": "Mundari", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mfe", + "speakers": 1241433, + "language_name": "Morisyen", + "autonym": "Kreol Morisien", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ga", + "speakers": 1237487, + "language_name": "Irish", + "autonym": "Gaeilge", + "family": "Indo-European", + "flores_path": "gle_Latn", + "fleurs_tag": "ga_ie", + "commonvoice_hours": 6.2, + "commonvoice_locale": "ga-IE", + "in_benchmark": true + }, + { + "bcp_47": "fvr", + "speakers": 1230163, + "language_name": "Fur", + "autonym": "Fur", + "family": "Furan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rej", + "speakers": 1228320, + "language_name": "Rejang", + "autonym": "Rejang", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bhi", + "speakers": 1220003, + "language_name": "Bhilali", + "autonym": "Bhilali", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tsg", + "speakers": 1200991, + "language_name": "Tausug", + "autonym": "Tausug", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "quc", + "speakers": 1200731, + "language_name": "Kʼicheʼ", + "autonym": "KʼIcheʼ", + "family": "Mayan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "quc", + "in_benchmark": false + }, + { + "bcp_47": "bqi", + "speakers": 1188926, + "language_name": "Bakhtiari", + "autonym": "Bakhtiari", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kpe", + "speakers": 1186303, + "language_name": "Kpelle", + "autonym": "Kpɛlɛɛ", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sef", + "speakers": 1181687, + "language_name": "Cebaara Senoufo", + "autonym": "Cebaara Senoufo", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kxm", + "speakers": 1172616, + "language_name": "Northern Khmer", + "autonym": "Northern Khmer", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "vls", + "speakers": 1172070, + "language_name": "West Flemish", + "autonym": "West Flemish", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "snk", + "speakers": 1153651, + "language_name": "Soninke", + "autonym": "Soninke", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "snk", + "in_benchmark": false + }, + { + "bcp_47": "zza", + "speakers": 1148245, + "language_name": "Zaza", + "autonym": "Zaza", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 1.8, + "commonvoice_locale": "zza", + "in_benchmark": false + }, + { + "bcp_47": "lv", + "speakers": 1147550, + "language_name": "Latvian", + "autonym": "Latviešu", + "family": "Indo-European", + "flores_path": "lvs_Latn", + "fleurs_tag": "lv_lv", + "commonvoice_hours": 261.0, + "commonvoice_locale": "lv", + "in_benchmark": true + }, + { + "bcp_47": "dnj", + "speakers": 1099244, + "language_name": "Dan", + "autonym": "Dan", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gor", + "speakers": 1094807, + "language_name": "Gorontalo", + "autonym": "Gorontalo", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tig", + "speakers": 1094616, + "language_name": "Tigre", + "autonym": "Tigre", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "tig", + "in_benchmark": false + }, + { + "bcp_47": "eu", + "speakers": 1088519, + "language_name": "Basque", + "autonym": "Euskara", + "family": null, + "flores_path": "eus_Latn", + "fleurs_tag": null, + "commonvoice_hours": 336.0, + "commonvoice_locale": "eu", + "in_benchmark": true + }, + { + "bcp_47": "hoj", + "speakers": 1087394, + "language_name": "Hadothi", + "autonym": "Hadothi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kbd", + "speakers": 1070873, + "language_name": "Kabardian", + "autonym": "Kabardian", + "family": "Abkhaz-Adyge", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 27.0, + "commonvoice_locale": "kbd", + "in_benchmark": false + }, + { + "bcp_47": "kha", + "speakers": 1060872, + "language_name": "Khasi", + "autonym": "Khasi", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sc", + "speakers": 1060846, + "language_name": "Sardinian", + "autonym": "Sardu", + "family": "Indo-European", + "flores_path": "srd_Latn", + "fleurs_tag": null, + "commonvoice_hours": 2.9, + "commonvoice_locale": "sc", + "in_benchmark": true + }, + { + "bcp_47": "loz", + "speakers": 1045596, + "language_name": "Lozi", + "autonym": "Lozi", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gur", + "speakers": 1026907, + "language_name": "Frafra", + "autonym": "Frafra", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ars", + "speakers": 1025205, + "language_name": "Najdi Arabic", + "autonym": "Najdi Arabic", + "family": "Afro-Asiatic", + "flores_path": "ars_Arab", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "rng", + "speakers": 1023339, + "language_name": "Ronga", + "autonym": "Ronga", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "luz", + "speakers": 1019080, + "language_name": "Southern Luri", + "autonym": "Southern Luri", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "new", + "speakers": 1000821, + "language_name": "Newari", + "autonym": "Newari", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "new", + "in_benchmark": false + }, + { + "bcp_47": "tly", + "speakers": 1000168, + "language_name": "Talysh", + "autonym": "Talysh", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "yi", + "speakers": 997214, + "language_name": "Yiddish", + "autonym": "ייִדיש", + "family": "Indo-European", + "flores_path": "ydd_Hebr", + "fleurs_tag": null, + "commonvoice_hours": 0.5, + "commonvoice_locale": "yi", + "in_benchmark": true + }, + { + "bcp_47": "bez", + "speakers": 995398, + "language_name": "Bena", + "autonym": "Hibena", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ksb", + "speakers": 995398, + "language_name": "Shambala", + "autonym": "Kishambaa", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kfr", + "speakers": 994568, + "language_name": "Kachhi", + "autonym": "Kachhi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nij", + "speakers": 987996, + "language_name": "Ngaju", + "autonym": "Ngaju", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mwk", + "speakers": 977670, + "language_name": "Kita Maninkakan", + "autonym": "Kita Maninkakan", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "jml", + "speakers": 970493, + "language_name": "Jumli", + "autonym": "Jumli", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ryu", + "speakers": 966404, + "language_name": "Central Okinawan", + "autonym": "Central Okinawan", + "family": "Japonic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mnw", + "speakers": 966114, + "language_name": "Mon", + "autonym": "Mon", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "qug", + "speakers": 963579, + "language_name": "Chimborazo Highland Quichua", + "autonym": "Chimborazo Highland Quichua", + "family": "Quechuan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "qug", + "in_benchmark": false + }, + { + "bcp_47": "kac", + "speakers": 962032, + "language_name": "Kachin", + "autonym": "Kachin", + "family": "Sino-Tibetan", + "flores_path": "kac_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "hnd", + "speakers": 957354, + "language_name": "Southern Hindko", + "autonym": "Southern Hindko", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "li", + "speakers": 950422, + "language_name": "Limburgish", + "autonym": "Limburgish", + "family": "Indo-European", + "flores_path": "lim_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "mxc", + "speakers": 945510, + "language_name": "Manyika", + "autonym": "Manyika", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ce", + "speakers": 935365, + "language_name": "Chechen", + "autonym": "Нохчийн", + "family": "Nakh-Daghestanian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kj", + "speakers": 920524, + "language_name": "Kuanyama", + "autonym": "Kuanyama", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nr", + "speakers": 903418, + "language_name": "South Ndebele", + "autonym": "South Ndebele", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "nr", + "in_benchmark": false + }, + { + "bcp_47": "cy", + "speakers": 884910, + "language_name": "Welsh", + "autonym": "Cymraeg", + "family": "Indo-European", + "flores_path": "cym_Latn", + "fleurs_tag": "cy_gb", + "commonvoice_hours": 124.0, + "commonvoice_locale": "cy", + "in_benchmark": true + }, + { + "bcp_47": "ada", + "speakers": 880206, + "language_name": "Adangme", + "autonym": "Adangme", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "et", + "speakers": 878449, + "language_name": "Estonian", + "autonym": "Eesti", + "family": "Uralic", + "flores_path": "ekk_Latn", + "fleurs_tag": "et_ee", + "commonvoice_hours": 58.0, + "commonvoice_locale": "et", + "in_benchmark": true + }, + { + "bcp_47": "prd", + "speakers": 864342, + "language_name": "Parsi-Dari", + "autonym": "Parsi-Dari", + "family": "Bookkeeping", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "yua", + "speakers": 861955, + "language_name": "Yucateco", + "autonym": "Yucateco", + "family": "Mayan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ewo", + "speakers": 860095, + "language_name": "Ewondo", + "autonym": "Ewondo", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 19.0, + "commonvoice_locale": "ewo", + "in_benchmark": false + }, + { + "bcp_47": "kge", + "speakers": 854483, + "language_name": "Komering", + "autonym": "Komering", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ttj", + "speakers": 821807, + "language_name": "Tooro", + "autonym": "Tooro", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ttj", + "in_benchmark": false + }, + { + "bcp_47": "grt", + "speakers": 821563, + "language_name": "Garo", + "autonym": "Garo", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gaa", + "speakers": 821526, + "language_name": "Ga", + "autonym": "Gã", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mgy", + "speakers": 819739, + "language_name": "Mbunga", + "autonym": "Mbunga", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tet", + "speakers": 816395, + "language_name": "Tetum", + "autonym": "Tetum", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "iba", + "speakers": 816302, + "language_name": "Iban", + "autonym": "Iban", + "family": "Bookkeeping", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ebu", + "speakers": 802918, + "language_name": "Embu", + "autonym": "Kĩembu", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "swg", + "speakers": 801597, + "language_name": "Swabian", + "autonym": "Swabian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "hnj", + "speakers": 781687, + "language_name": "Hmong Njua", + "autonym": "𞄀𞄄𞄰𞄩𞄍𞄜𞄰", + "family": "Hmong-Mien", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kck", + "speakers": 770954, + "language_name": "Kalanga", + "autonym": "Kalanga", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "dty", + "speakers": 758198, + "language_name": "Dotyali", + "autonym": "Dotyali", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pcd", + "speakers": 746330, + "language_name": "Picard", + "autonym": "Picard", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fy", + "speakers": 743057, + "language_name": "Western Frisian", + "autonym": "Frysk", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 66.0, + "commonvoice_locale": "fy-NL", + "in_benchmark": false + }, + { + "bcp_47": "ccp", + "speakers": 729137, + "language_name": "Chakma", + "autonym": "𑄌𑄋𑄴𑄟𑄳𑄦", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "yao", + "speakers": 722357, + "language_name": "Yao", + "autonym": "Yao", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "aoz", + "speakers": 720970, + "language_name": "Uab Meto", + "autonym": "Uab Meto", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cps", + "speakers": 720595, + "language_name": "Capiznon", + "autonym": "Capiznon", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "asa", + "speakers": 702634, + "language_name": "Asu", + "autonym": "Kipare", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "blt", + "speakers": 681177, + "language_name": "Tai Dam", + "autonym": "ꪼꪕꪒꪾ", + "family": "Tai-Kadai", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "wa", + "speakers": 679801, + "language_name": "Walloon", + "autonym": "Walon", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ses", + "speakers": 664816, + "language_name": "Koyraboro Senni", + "autonym": "Koyraboro Senni", + "family": "Songhay", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bfy", + "speakers": 654424, + "language_name": "Bagheli", + "autonym": "Bagheli", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ast", + "speakers": 650205, + "language_name": "Asturian", + "autonym": "Asturianu", + "family": "Indo-European", + "flores_path": "ast_Latn", + "fleurs_tag": "ast_es", + "commonvoice_hours": 0.8, + "commonvoice_locale": "ast", + "in_benchmark": true + }, + { + "bcp_47": "lki", + "speakers": 645417, + "language_name": "Laki", + "autonym": "Laki", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "unx", + "speakers": 636523, + "language_name": "Munda", + "autonym": "Munda", + "family": "Bookkeeping", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lis", + "speakers": 627309, + "language_name": "Lisu", + "autonym": "Lisu", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nqo", + "speakers": 626370, + "language_name": "N’Ko", + "autonym": "ߒߞߏ", + "family": "Artificial Language", + "flores_path": "nqo_Nkoo", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "nqo", + "in_benchmark": true + }, + { + "bcp_47": "gos", + "speakers": 622094, + "language_name": "Gronings", + "autonym": "Gronings", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "gos", + "in_benchmark": false + }, + { + "bcp_47": "lol", + "speakers": 620858, + "language_name": "Mongo", + "autonym": "Mongo", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "thl", + "speakers": 606558, + "language_name": "Dangaura Tharu", + "autonym": "Dangaura Tharu", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nap", + "speakers": 605306, + "language_name": "Neapolitan", + "autonym": "Neapolitan", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nus", + "speakers": 591427, + "language_name": "Nuer", + "autonym": "Thok Nath", + "family": "Nilotic", + "flores_path": "nus_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "br", + "speakers": 563140, + "language_name": "Breton", + "autonym": "Brezhoneg", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 27.0, + "commonvoice_locale": "br", + "in_benchmark": false + }, + { + "bcp_47": "pis", + "speakers": 561780, + "language_name": "Pijin", + "autonym": "Pijin", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rcf", + "speakers": 559185, + "language_name": "Réunion Creole French", + "autonym": "Réunion Creole French", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "av", + "speakers": 552716, + "language_name": "Avaric", + "autonym": "Avaric", + "family": "Nakh-Daghestanian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ng", + "speakers": 552315, + "language_name": "Ndonga", + "autonym": "Ndonga", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "os", + "speakers": 541444, + "language_name": "Ossetic", + "autonym": "Ирон", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.9, + "commonvoice_locale": "os", + "in_benchmark": false + }, + { + "bcp_47": "udm", + "speakers": 538544, + "language_name": "Udmurt", + "autonym": "Udmurt", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "udm", + "in_benchmark": false + }, + { + "bcp_47": "lij", + "speakers": 536663, + "language_name": "Ligurian", + "autonym": "Ligure", + "family": "Indo-European", + "flores_path": "lij_Latn", + "fleurs_tag": null, + "commonvoice_hours": 5.1, + "commonvoice_locale": "lij", + "in_benchmark": true + }, + { + "bcp_47": "kea", + "speakers": 530762, + "language_name": "Kabuverdianu", + "autonym": "Kabuverdianu", + "family": "Indo-European", + "flores_path": "kea_Latn", + "fleurs_tag": "kea_cv", + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "rue", + "speakers": 527075, + "language_name": "Rusyn", + "autonym": "Rusyn", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "chm", + "speakers": 524371, + "language_name": "Mari", + "autonym": "Mari", + "family": "Uralic", + "flores_path": "mhr_Cyrl", + "fleurs_tag": null, + "commonvoice_hours": 282.0, + "commonvoice_locale": "mhr", + "in_benchmark": true + }, + { + "bcp_47": "scn", + "speakers": 511702, + "language_name": "Sicilian", + "autonym": "Sicilianu", + "family": "Indo-European", + "flores_path": "scn_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "scn", + "in_benchmark": true + }, + { + "bcp_47": "lag", + "speakers": 509409, + "language_name": "Langi", + "autonym": "Kɨlaangi", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bft", + "speakers": 502520, + "language_name": "Balti", + "autonym": "Balti", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 18.0, + "commonvoice_locale": "bft", + "in_benchmark": false + }, + { + "bcp_47": "nhe", + "speakers": 501735, + "language_name": "Eastern Huasteca Nahuatl", + "autonym": "Eastern Huasteca Nahuatl", + "family": "Uto-Aztecan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "nhe", + "in_benchmark": false + }, + { + "bcp_47": "nhw", + "speakers": 501735, + "language_name": "Western Huasteca Nahuatl", + "autonym": "Western Huasteca Nahuatl", + "family": "Uto-Aztecan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "szl", + "speakers": 497670, + "language_name": "Silesian", + "autonym": "Ślōnski", + "family": "Indo-European", + "flores_path": "szl_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "kaa", + "speakers": 489046, + "language_name": "Kara-Kalpak", + "autonym": "Kara-Kalpak", + "family": "Turkic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "kaa", + "in_benchmark": false + }, + { + "bcp_47": "gju", + "speakers": 467002, + "language_name": "Gujari", + "autonym": "Gujari", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "gju", + "in_benchmark": false + }, + { + "bcp_47": "srx", + "speakers": 464132, + "language_name": "Sirmauri", + "autonym": "Sirmauri", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mt", + "speakers": 457267, + "language_name": "Maltese", + "autonym": "Malti", + "family": "Afro-Asiatic", + "flores_path": "mlt_Latn", + "fleurs_tag": "mt_mt", + "commonvoice_hours": 8.7, + "commonvoice_locale": "mt", + "in_benchmark": true + }, + { + "bcp_47": "bap", + "speakers": 454918, + "language_name": "Bantawa", + "autonym": "Bantawa", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sah", + "speakers": 453510, + "language_name": "Yakut", + "autonym": "Саха Тыла", + "family": "Turkic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "sah", + "in_benchmark": false + }, + { + "bcp_47": "mls", + "speakers": 451060, + "language_name": "Masalit", + "autonym": "Masalit", + "family": "Maban", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kaj", + "speakers": 449459, + "language_name": "Jju", + "autonym": "Kaje", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ady", + "speakers": 444583, + "language_name": "Adyghe", + "autonym": "Adyghe", + "family": "Abkhaz-Adyge", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 12.0, + "commonvoice_locale": "ady", + "in_benchmark": false + }, + { + "bcp_47": "ybb", + "speakers": 443920, + "language_name": "Yemba", + "autonym": "Yemba", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "xmf", + "speakers": 439670, + "language_name": "Mingrelian", + "autonym": "Mingrelian", + "family": "Kartvelian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "xmf", + "in_benchmark": false + }, + { + "bcp_47": "myv", + "speakers": 439338, + "language_name": "Erzya", + "autonym": "Эрзянь Кель", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 3.8, + "commonvoice_locale": "myv", + "in_benchmark": false + }, + { + "bcp_47": "dav", + "speakers": 438929, + "language_name": "Taita", + "autonym": "Kitaita", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 9.3, + "commonvoice_locale": "dav", + "in_benchmark": false + }, + { + "bcp_47": "maz", + "speakers": 437410, + "language_name": "Central Mazahua", + "autonym": "Central Mazahua", + "family": "Otomanguean", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "vun", + "speakers": 433291, + "language_name": "Vunjo", + "autonym": "Kyivunjo", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rof", + "speakers": 433291, + "language_name": "Rombo", + "autonym": "Kihorombo", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "rof", + "in_benchmark": false + }, + { + "bcp_47": "jmc", + "speakers": 433291, + "language_name": "Machame", + "autonym": "Kimachame", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kjg", + "speakers": 431949, + "language_name": "Khmu", + "autonym": "Khmu", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fan", + "speakers": 426451, + "language_name": "Fang", + "autonym": "Fang", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 9.3, + "commonvoice_locale": "fan", + "in_benchmark": false + }, + { + "bcp_47": "krj", + "speakers": 425806, + "language_name": "Kinaray-a", + "autonym": "Kinaray-A", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kdt", + "speakers": 421207, + "language_name": "Kuy", + "autonym": "Kuy", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lb", + "speakers": 421015, + "language_name": "Luxembourgish", + "autonym": "Lëtzebuergesch", + "family": "Indo-European", + "flores_path": "ltz_Latn", + "fleurs_tag": "lb_lu", + "commonvoice_hours": 0.0, + "commonvoice_locale": "lb", + "in_benchmark": true + }, + { + "bcp_47": "srn", + "speakers": 414507, + "language_name": "Sranan Tongo", + "autonym": "Sranan Tongo", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "dyo", + "speakers": 409146, + "language_name": "Jola-Fonyi", + "autonym": "Joola", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tdg", + "speakers": 394263, + "language_name": "Western Tamang", + "autonym": "Western Tamang", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pnt", + "speakers": 392463, + "language_name": "Pontic", + "autonym": "Pontic", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "zmi", + "speakers": 391825, + "language_name": "Negeri Sembilan Malay", + "autonym": "Negeri Sembilan Malay", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nnh", + "speakers": 388430, + "language_name": "Ngiemboon", + "autonym": "Shwóŋò Ngiembɔɔn", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 23.0, + "commonvoice_locale": "nnh", + "in_benchmark": false + }, + { + "bcp_47": "bbj", + "speakers": 388430, + "language_name": "Ghomala", + "autonym": "Ghomala", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 13.0, + "commonvoice_locale": "bbj", + "in_benchmark": false + }, + { + "bcp_47": "dv", + "speakers": 388044, + "language_name": "Divehi", + "autonym": "Divehi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 39.0, + "commonvoice_locale": "dv", + "in_benchmark": false + }, + { + "bcp_47": "saz", + "speakers": 384566, + "language_name": "Saurashtra", + "autonym": "Saurashtra", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "hif", + "speakers": 383749, + "language_name": "Fiji Hindi", + "autonym": "Fiji Hindi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fia", + "speakers": 378161, + "language_name": "Nobiin", + "autonym": "Nobiin", + "family": "Nubian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bgx", + "speakers": 377280, + "language_name": "Balkan Gagauz Turkish", + "autonym": "Balkan Gagauz Turkish", + "family": "Turkic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kvr", + "speakers": 373836, + "language_name": "Kerinci", + "autonym": "Kerinci", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kvx", + "speakers": 373602, + "language_name": "Parkari Koli", + "autonym": "Parkari Koli", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "kvx", + "in_benchmark": false + }, + { + "bcp_47": "dz", + "speakers": 370341, + "language_name": "Dzongkha", + "autonym": "རྫོང་ཁ", + "family": "Bookkeeping", + "flores_path": "dzo_Tibt", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "pko", + "speakers": 369343, + "language_name": "Pökoot", + "autonym": "Pökoot", + "family": "Nilotic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "dar", + "speakers": 368477, + "language_name": "Dargwa", + "autonym": "Dargwa", + "family": "Nakh-Daghestanian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "dar", + "in_benchmark": false + }, + { + "bcp_47": "lif", + "speakers": 368085, + "language_name": "Limbu", + "autonym": "Limbu", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fj", + "speakers": 365030, + "language_name": "Fijian", + "autonym": "Fijian", + "family": "Austronesian", + "flores_path": "fij_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "thr", + "speakers": 363935, + "language_name": "Rana Tharu", + "autonym": "Rana Tharu", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bkm", + "speakers": 360685, + "language_name": "Kom", + "autonym": "Kom", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 13.0, + "commonvoice_locale": "bkm", + "in_benchmark": false + }, + { + "bcp_47": "is", + "speakers": 350734, + "language_name": "Icelandic", + "autonym": "Íslenska", + "family": "Indo-European", + "flores_path": "isl_Latn", + "fleurs_tag": "is_is", + "commonvoice_hours": 0.1, + "commonvoice_locale": "is", + "in_benchmark": true + }, + { + "bcp_47": "khw", + "speakers": 350252, + "language_name": "Khowar", + "autonym": "Khowar", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 18.0, + "commonvoice_locale": "khw", + "in_benchmark": false + }, + { + "bcp_47": "lbw", + "speakers": 347134, + "language_name": "Tolaki", + "autonym": "Tolaki", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nxq", + "speakers": 334565, + "language_name": "Naxi", + "autonym": "Naxi", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mgp", + "speakers": 333607, + "language_name": "Eastern Magar", + "autonym": "Eastern Magar", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bax", + "speakers": 332940, + "language_name": "Bamun", + "autonym": "Bamun", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "bax", + "in_benchmark": false + }, + { + "bcp_47": "bas", + "speakers": 332940, + "language_name": "Basaa", + "autonym": "Ɓàsàa", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 12.0, + "commonvoice_locale": "bas", + "in_benchmark": false + }, + { + "bcp_47": "khq", + "speakers": 332408, + "language_name": "Koyra Chiini", + "autonym": "Koyra Ciini", + "family": "Songhay", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mvy", + "speakers": 326901, + "language_name": "Indus Kohistani", + "autonym": "Indus Kohistani", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 23.0, + "commonvoice_locale": "mvy", + "in_benchmark": false + }, + { + "bcp_47": "gay", + "speakers": 320431, + "language_name": "Gayo", + "autonym": "Gayo", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "zdj", + "speakers": 313124, + "language_name": "Ngazidja Comorian", + "autonym": "Ngazidja Comorian", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bua", + "speakers": 311788, + "language_name": "Buriat", + "autonym": "Buriat", + "family": "Mongolic-Khitan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "bxr", + "in_benchmark": false + }, + { + "bcp_47": "bto", + "speakers": 305707, + "language_name": "Rinconada Bikol", + "autonym": "Rinconada Bikol", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "byv", + "speakers": 305195, + "language_name": "Medumba", + "autonym": "Medumba", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 14.0, + "commonvoice_locale": "byv", + "in_benchmark": false + }, + { + "bcp_47": "njo", + "speakers": 305001, + "language_name": "Ao Naga", + "autonym": "Ao Naga", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bfq", + "speakers": 305001, + "language_name": "Badaga", + "autonym": "Badaga", + "family": "Dravidian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "thq", + "speakers": 303279, + "language_name": "Kochila Tharu", + "autonym": "Kochila Tharu", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mdf", + "speakers": 297616, + "language_name": "Moksha", + "autonym": "Мокшень Кяль", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.5, + "commonvoice_locale": "mdf", + "in_benchmark": false + }, + { + "bcp_47": "rob", + "speakers": 293729, + "language_name": "Tae'", + "autonym": "Tae'", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nzi", + "speakers": 293402, + "language_name": "Nzima", + "autonym": "Nzima", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "naq", + "speakers": 289308, + "language_name": "Nama", + "autonym": "Khoekhoegowab", + "family": "Khoe-Kwadi", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "wni", + "speakers": 287736, + "language_name": "Ndzwani Comorian", + "autonym": "Ndzwani Comorian", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kum", + "speakers": 283444, + "language_name": "Kumyk", + "autonym": "Kumyk", + "family": "Turkic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mua", + "speakers": 277450, + "language_name": "Mundang", + "autonym": "Mundaŋ", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "mua", + "in_benchmark": false + }, + { + "bcp_47": "arn", + "speakers": 272802, + "language_name": "Mapuche", + "autonym": "Mapudungun", + "family": "Araucanian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "arn", + "in_benchmark": false + }, + { + "bcp_47": "cja", + "speakers": 270832, + "language_name": "Western Cham", + "autonym": "Western Cham", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bi", + "speakers": 268500, + "language_name": "Bislama", + "autonym": "Bislama", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "khb", + "speakers": 264864, + "language_name": "Lü", + "autonym": "Lü", + "family": "Tai-Kadai", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tdd", + "speakers": 264864, + "language_name": "Tai Nüa", + "autonym": "Tai Nüa", + "family": "Tai-Kadai", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kxp", + "speakers": 256851, + "language_name": "Wadiyara Koli", + "autonym": "Wadiyara Koli", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "kxp", + "in_benchmark": false + }, + { + "bcp_47": "gjk", + "speakers": 256851, + "language_name": "Kachi Koli", + "autonym": "Kachi Koli", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "gjk", + "in_benchmark": false + }, + { + "bcp_47": "lez", + "speakers": 255100, + "language_name": "Lezghian", + "autonym": "Lezghian", + "family": "Nakh-Daghestanian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kv", + "speakers": 255100, + "language_name": "Komi", + "autonym": "Komi", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "kpv", + "in_benchmark": false + }, + { + "bcp_47": "sm", + "speakers": 252717, + "language_name": "Samoan", + "autonym": "Samoan", + "family": "Austronesian", + "flores_path": "smo_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "mrd", + "speakers": 251722, + "language_name": "Western Magar", + "autonym": "Western Magar", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "saq", + "speakers": 246228, + "language_name": "Samburu", + "autonym": "Kisampur", + "family": "Nilotic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "crh", + "speakers": 245968, + "language_name": "Crimean Tatar", + "autonym": "Crimean Tatar", + "family": "Turkic", + "flores_path": "crh_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "crh", + "in_benchmark": true + }, + { + "bcp_47": "mdr", + "speakers": 245664, + "language_name": "Mandar", + "autonym": "Mandar", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sxn", + "speakers": 245664, + "language_name": "Sangir", + "autonym": "Sangir", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ext", + "speakers": 245077, + "language_name": "Extremaduran", + "autonym": "Extremaduran", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nch", + "speakers": 244435, + "language_name": "Central Huasteca Nahuatl", + "autonym": "Central Huasteca Nahuatl", + "family": "Uto-Aztecan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "zea", + "speakers": 241926, + "language_name": "Zeelandic", + "autonym": "Zeelandic", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "krc", + "speakers": 240927, + "language_name": "Karachay-Balkar", + "autonym": "Karachay-Balkar", + "family": "Turkic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "krc", + "in_benchmark": false + }, + { + "bcp_47": "ksh", + "speakers": 240479, + "language_name": "Colognian", + "autonym": "Kölsch", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "hz", + "speakers": 239336, + "language_name": "Herero", + "autonym": "Herero", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sav", + "speakers": 236046, + "language_name": "Saafi-Saafi", + "autonym": "Saafi-Saafi", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "zag", + "speakers": 232364, + "language_name": "Zaghawa", + "autonym": "Zaghawa", + "family": "Saharan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "inh", + "speakers": 226755, + "language_name": "Ingush", + "autonym": "Ingush", + "family": "Nakh-Daghestanian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ssy", + "speakers": 218923, + "language_name": "Saho", + "autonym": "Saho", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "dtm", + "speakers": 215087, + "language_name": "Tomo Kan Dogon", + "autonym": "Tomo Kan Dogon", + "family": "Dogon", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pap", + "speakers": 211640, + "language_name": "Papiamento", + "autonym": "Papiamentu", + "family": "Indo-European", + "flores_path": "pap_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "pap-AW", + "in_benchmark": true + }, + { + "bcp_47": "syr", + "speakers": 210659, + "language_name": "Syriac", + "autonym": "ܣܘܪܝܝܐ", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "syr", + "in_benchmark": false + }, + { + "bcp_47": "tog", + "speakers": 207727, + "language_name": "Nyasa Tonga", + "autonym": "Nyasa Tonga", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "maf", + "speakers": 205313, + "language_name": "Mafa", + "autonym": "Mafa", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "puu", + "speakers": 200782, + "language_name": "Punu", + "autonym": "Punu", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kcg", + "speakers": 199046, + "language_name": "Tyap", + "autonym": "Katab", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kao", + "speakers": 195534, + "language_name": "Xaasongaxango", + "autonym": "Xaasongaxango", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tyv", + "speakers": 184239, + "language_name": "Tuvinian", + "autonym": "Tuvinian", + "family": "Turkic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "tyv", + "in_benchmark": false + }, + { + "bcp_47": "dtp", + "speakers": 182852, + "language_name": "Central Dusun", + "autonym": "Central Dusun", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ria", + "speakers": 172392, + "language_name": "Riang (India)", + "autonym": "Riang (India)", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "swb", + "speakers": 170720, + "language_name": "Comorian", + "autonym": "Comorian", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bmq", + "speakers": 168159, + "language_name": "Bomu", + "autonym": "Bomu", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ltg", + "speakers": 167429, + "language_name": "Latgalian", + "autonym": "Latgalian", + "family": "Indo-European", + "flores_path": "ltg_Latn", + "fleurs_tag": null, + "commonvoice_hours": 29.0, + "commonvoice_locale": "ltg", + "in_benchmark": true + }, + { + "bcp_47": "nv", + "speakers": 166320, + "language_name": "Navajo", + "autonym": "Diné Bizaad", + "family": "Athabaskan-Eyak-Tlingit", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bze", + "speakers": 166204, + "language_name": "Jenaama Bozo", + "autonym": "Jenaama Bozo", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "co", + "speakers": 162836, + "language_name": "Corsican", + "autonym": "Corsu", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "co", + "in_benchmark": false + }, + { + "bcp_47": "bfd", + "speakers": 158146, + "language_name": "Bafut", + "autonym": "Bafut", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "bfd", + "in_benchmark": false + }, + { + "bcp_47": "xsr", + "speakers": 157705, + "language_name": "Sherpa", + "autonym": "Sherpa", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kiu", + "speakers": 155833, + "language_name": "Kirmanjki", + "autonym": "Kirmanjki", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ho", + "speakers": 152449, + "language_name": "Hiri Motu", + "autonym": "Hiri Motu", + "family": "Pidgin", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kkj", + "speakers": 149823, + "language_name": "Kako", + "autonym": "Kakɔ", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bss", + "speakers": 149823, + "language_name": "Akoose", + "autonym": "Akoose", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sly", + "speakers": 144194, + "language_name": "Selayar", + "autonym": "Selayar", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mi", + "speakers": 137913, + "language_name": "Māori", + "autonym": "Māori", + "family": "Austronesian", + "flores_path": "mri_Latn", + "fleurs_tag": "mi_nz", + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "rjs", + "speakers": 133443, + "language_name": "Rajbanshi", + "autonym": "Rajbanshi", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "dua", + "speakers": 133176, + "language_name": "Duala", + "autonym": "Duálá", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 14.0, + "commonvoice_locale": "dua", + "in_benchmark": false + }, + { + "bcp_47": "guc", + "speakers": 132529, + "language_name": "Wayuu", + "autonym": "Wayuu", + "family": "Arawakan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "guc", + "in_benchmark": false + }, + { + "bcp_47": "vai", + "speakers": 131906, + "language_name": "Vai", + "autonym": "ꕙꔤ", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "taj", + "speakers": 130410, + "language_name": "Eastern Tamang", + "autonym": "Eastern Tamang", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mgo", + "speakers": 130401, + "language_name": "Metaʼ", + "autonym": "Metaʼ", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pdc", + "speakers": 129729, + "language_name": "Pennsylvania German", + "autonym": "Pennsylvania German", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rwk", + "speakers": 128816, + "language_name": "Rwa", + "autonym": "Kiruwa", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "trw", + "speakers": 123756, + "language_name": "Torwali", + "autonym": "توروالی", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 19.0, + "commonvoice_locale": "trw", + "in_benchmark": false + }, + { + "bcp_47": "mfv", + "speakers": 121170, + "language_name": "Mandjak", + "autonym": "Mandjak", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tsj", + "speakers": 117348, + "language_name": "Tshangla", + "autonym": "Tshangla", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sbp", + "speakers": 117106, + "language_name": "Sangu", + "autonym": "Ishisangu", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "krl", + "speakers": 116212, + "language_name": "Karelian", + "autonym": "Karelian", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lad", + "speakers": 112781, + "language_name": "Ladino", + "autonym": "Ladino", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ife", + "speakers": 111910, + "language_name": "Ifè", + "autonym": "Ifè", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gag", + "speakers": 111028, + "language_name": "Gagauz", + "autonym": "Gagauz", + "family": "Turkic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lbe", + "speakers": 110543, + "language_name": "Lak", + "autonym": "Lak", + "family": "Nakh-Daghestanian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sdc", + "speakers": 106085, + "language_name": "Sassarese Sardinian", + "autonym": "Sassarese Sardinian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "to", + "speakers": 100790, + "language_name": "Tongan", + "autonym": "Lea Fakatonga", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bjt", + "speakers": 95992, + "language_name": "Balanta-Ganja", + "autonym": "Balanta-Ganja", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "jgo", + "speakers": 94333, + "language_name": "Ngomba", + "autonym": "Ndaꞌa", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "jgo", + "in_benchmark": false + }, + { + "bcp_47": "crs", + "speakers": 94061, + "language_name": "Seselwa Creole French", + "autonym": "Seselwa Creole French", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ab", + "speakers": 91953, + "language_name": "Abkhazian", + "autonym": "Аԥсшәа", + "family": "Abkhaz-Adyge", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 67.0, + "commonvoice_locale": "ab", + "in_benchmark": false + }, + { + "bcp_47": "ty", + "speakers": 91488, + "language_name": "Tahitian", + "autonym": "Tahitian", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ty", + "in_benchmark": false + }, + { + "bcp_47": "iu", + "speakers": 90466, + "language_name": "Inuktitut", + "autonym": "Inuktitut", + "family": "Eskimo-Aleut", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pdt", + "speakers": 90466, + "language_name": "Plautdietsch", + "autonym": "Plautdietsch", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bpy", + "speakers": 90174, + "language_name": "Bishnupriya", + "autonym": "Bishnupriya", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ksf", + "speakers": 88784, + "language_name": "Bafia", + "autonym": "Rikpa", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 20.0, + "commonvoice_locale": "ksf", + "in_benchmark": false + }, + { + "bcp_47": "gvr", + "speakers": 87951, + "language_name": "Gurung", + "autonym": "Gurung", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cjm", + "speakers": 87862, + "language_name": "Eastern Cham", + "autonym": "Eastern Cham", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lcp", + "speakers": 87751, + "language_name": "Western Lawa", + "autonym": "Western Lawa", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "knf", + "speakers": 83151, + "language_name": "Mankanya", + "autonym": "Mankanya", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lep", + "speakers": 79743, + "language_name": "Lepcha", + "autonym": "Lepcha", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "byn", + "speakers": 79056, + "language_name": "Blin", + "autonym": "Blin", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "btv", + "speakers": 78843, + "language_name": "Bateri", + "autonym": "Bateri", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "btv", + "in_benchmark": false + }, + { + "bcp_47": "tkt", + "speakers": 72787, + "language_name": "Kathoriya Tharu", + "autonym": "Kathoriya Tharu", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gd", + "speakers": 72337, + "language_name": "Scottish Gaelic", + "autonym": "Gàidhlig", + "family": "Indo-European", + "flores_path": "gla_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "fo", + "speakers": 71351, + "language_name": "Faroese", + "autonym": "Føroyskt", + "family": "Indo-European", + "flores_path": "fao_Latn", + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "fo", + "in_benchmark": true + }, + { + "bcp_47": "vro", + "speakers": 70031, + "language_name": "Võro", + "autonym": "Võro", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ken", + "speakers": 69362, + "language_name": "Kenyang", + "autonym": "Kɛnyaŋ", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gil", + "speakers": 67078, + "language_name": "Gilbertese", + "autonym": "Gilbertese", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bvb", + "speakers": 66058, + "language_name": "Bube", + "autonym": "Bube", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mwv", + "speakers": 64086, + "language_name": "Mentawai", + "autonym": "Mentawai", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "frp", + "speakers": 63777, + "language_name": "Arpitan", + "autonym": "Arpitan", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "koi", + "speakers": 63775, + "language_name": "Komi-Permyak", + "autonym": "Komi-Permyak", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kfo", + "speakers": 63207, + "language_name": "Koro", + "autonym": "Koro", + "family": "Mande", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mh", + "speakers": 56879, + "language_name": "Marshallese", + "autonym": "Marshallese", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fit", + "speakers": 56114, + "language_name": "Tornedalen Finnish", + "autonym": "Tornedalen Finnish", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kl", + "speakers": 55440, + "language_name": "Kalaallisut", + "autonym": "Kalaallisut", + "family": "Eskimo-Aleut", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bra", + "speakers": 54370, + "language_name": "Braj", + "autonym": "Braj", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gcr", + "speakers": 51872, + "language_name": "Guianese Creole French", + "autonym": "Guianese Creole French", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "se", + "speakers": 51530, + "language_name": "Northern Sami", + "autonym": "Davvisámegiella", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "blo", + "speakers": 51507, + "language_name": "Anii", + "autonym": "Anii Kagɩja", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kgp", + "speakers": 50812, + "language_name": "Kaingang", + "autonym": "Kanhgág", + "family": "Nuclear-Macro-Je", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "csb", + "speakers": 49767, + "language_name": "Kashubian", + "autonym": "Kashubian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bqv", + "speakers": 46718, + "language_name": "Koro Wachi", + "autonym": "Koro Wachi", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ch", + "speakers": 46325, + "language_name": "Chamorro", + "autonym": "Chamorro", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cch", + "speakers": 44946, + "language_name": "Atsam", + "autonym": "Atsam", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "buc", + "speakers": 44620, + "language_name": "Bushi", + "autonym": "Bushi", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rm", + "speakers": 42020, + "language_name": "Romansh", + "autonym": "Rumantsch", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "agq", + "speakers": 38843, + "language_name": "Aghem", + "autonym": "Aghem", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kxv", + "speakers": 38457, + "language_name": "Kuvi", + "autonym": "Kuvi", + "family": "Dravidian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "snf", + "speakers": 37767, + "language_name": "Noon", + "autonym": "Noon", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fur", + "speakers": 37442, + "language_name": "Friulian", + "autonym": "Furlan", + "family": "Indo-European", + "flores_path": "fur_Latn", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "tdh", + "speakers": 36393, + "language_name": "Thulung", + "autonym": "Thulung", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "egl", + "speakers": 31201, + "language_name": "Emilian", + "autonym": "Emilian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "chk", + "speakers": 30731, + "language_name": "Chuukese", + "autonym": "Chuukese", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mrj", + "speakers": 29762, + "language_name": "Western Mari", + "autonym": "Western Mari", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 34.0, + "commonvoice_locale": "mrj", + "in_benchmark": false + }, + { + "bcp_47": "haw", + "speakers": 29605, + "language_name": "Hawaiian", + "autonym": "ʻŌlelo HawaiʻI", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mro", + "speakers": 29277, + "language_name": "Mru", + "autonym": "Mru", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "frc", + "speakers": 27942, + "language_name": "Cajun French", + "autonym": "Cajun French", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "yrl", + "speakers": 26171, + "language_name": "Nheengatu", + "autonym": "Nheẽgatu", + "family": "Tupian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "an", + "speakers": 26008, + "language_name": "Aragonese", + "autonym": "Aragonés", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 16.0, + "commonvoice_locale": "an", + "in_benchmark": false + }, + { + "bcp_47": "chr", + "speakers": 25613, + "language_name": "Cherokee", + "autonym": "Ꮳꮃꭹ", + "family": "Iroquoian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rmo", + "speakers": 24372, + "language_name": "Sinte Romani", + "autonym": "Sinte Romani", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "oj", + "speakers": 23747, + "language_name": "Ojibwa", + "autonym": "Ojibwa", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pon", + "speakers": 23560, + "language_name": "Pohnpeian", + "autonym": "Pohnpeian", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lzz", + "speakers": 22965, + "language_name": "Laz", + "autonym": "Laz", + "family": "Kartvelian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "lzz", + "in_benchmark": false + }, + { + "bcp_47": "ttt", + "speakers": 22453, + "language_name": "Muslim Tat", + "autonym": "Muslim Tat", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "esu", + "speakers": 20956, + "language_name": "Central Yupik", + "autonym": "Central Yupik", + "family": "Eskimo-Aleut", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 7.6, + "commonvoice_locale": "esu", + "in_benchmark": false + }, + { + "bcp_47": "dak", + "speakers": 20832, + "language_name": "Dakota", + "autonym": "Dakota", + "family": "Siouan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "alt", + "speakers": 19841, + "language_name": "Southern Altai", + "autonym": "Southern Altai", + "family": "Turkic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "amo", + "speakers": 18620, + "language_name": "Amo", + "autonym": "Amo", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gub", + "speakers": 17784, + "language_name": "Guajajára", + "autonym": "Guajajára", + "family": "Tupian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "hnn", + "speakers": 17469, + "language_name": "Hanunoo", + "autonym": "Hanunoo", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tkr", + "speakers": 16329, + "language_name": "Tsakhur", + "autonym": "Tsakhur", + "family": "Nakh-Daghestanian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pau", + "speakers": 16047, + "language_name": "Palauan", + "autonym": "Palauan", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sa", + "speakers": 15913, + "language_name": "Sanskrit", + "autonym": "संस्कृत भाषा", + "family": "Indo-European", + "flores_path": "san_Deva", + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": true + }, + { + "bcp_47": "bsc", + "speakers": 15264, + "language_name": "Bassari", + "autonym": "Bassari", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ojs", + "speakers": 15078, + "language_name": "Oji-Cree", + "autonym": "Oji-Cree", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kht", + "speakers": 13527, + "language_name": "Khamti", + "autonym": "Khamti", + "family": "Tai-Kadai", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "hsb", + "speakers": 12826, + "language_name": "Upper Sorbian", + "autonym": "Hornjoserbšćina", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 2.9, + "commonvoice_locale": "hsb", + "in_benchmark": false + }, + { + "bcp_47": "chp", + "speakers": 12816, + "language_name": "Chipewyan", + "autonym": "Chipewyan", + "family": "Athabaskan-Eyak-Tlingit", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "moe", + "speakers": 12062, + "language_name": "Innu-aimun", + "autonym": "Innu-Aimun", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sli", + "speakers": 11868, + "language_name": "Lower Silesian", + "autonym": "Lower Silesian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "wae", + "speakers": 11377, + "language_name": "Walser", + "autonym": "Walser", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cho", + "speakers": 10977, + "language_name": "Choctaw", + "autonym": "Chahta", + "family": "Muskogean", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tbw", + "speakers": 10045, + "language_name": "Tagbanwa", + "autonym": "Tagbanwa", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "xav", + "speakers": 9951, + "language_name": "Xavánte", + "autonym": "Xavánte", + "family": "Nuclear-Macro-Je", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tvl", + "speakers": 9868, + "language_name": "Tuvalu", + "autonym": "Tuvalu", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "frr", + "speakers": 9619, + "language_name": "Northern Frisian", + "autonym": "Nordfriisk", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rug", + "speakers": 9591, + "language_name": "Roviana", + "autonym": "Roviana", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "wls", + "speakers": 9512, + "language_name": "Wallisian", + "autonym": "Wallisian", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rmu", + "speakers": 9488, + "language_name": "Tavringer Romani", + "autonym": "Tavringer Romani", + "family": "Speech Register", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cr", + "speakers": 9047, + "language_name": "Cree", + "autonym": "Cree", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nmg", + "speakers": 8878, + "language_name": "Kwasio", + "autonym": "Kwasio", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lkt", + "speakers": 8316, + "language_name": "Lakota", + "autonym": "LakȟólʼIyapi", + "family": "Siouan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kos", + "speakers": 7990, + "language_name": "Kosraean", + "autonym": "Kosraean", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ik", + "speakers": 7983, + "language_name": "Inupiaq", + "autonym": "Inupiaq", + "family": "Eskimo-Aleut", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 7.2, + "commonvoice_locale": "ipk", + "in_benchmark": false + }, + { + "bcp_47": "gbz", + "speakers": 7983, + "language_name": "Zoroastrian Dari", + "autonym": "Zoroastrian Dari", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "twq", + "speakers": 7970, + "language_name": "Tasawaq", + "autonym": "Tasawaq Senni", + "family": "Songhay", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bku", + "speakers": 7970, + "language_name": "Buhid", + "autonym": "Buhid", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mic", + "speakers": 7916, + "language_name": "Mi'kmaw", + "autonym": "LʼNuiʼSuti", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mey", + "speakers": 7239, + "language_name": "Hassaniyya", + "autonym": "Hassaniyya", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "dsb", + "speakers": 6974, + "language_name": "Lower Sorbian", + "autonym": "Dolnoserbšćina", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "dsb", + "in_benchmark": false + }, + { + "bcp_47": "na", + "speakers": 6930, + "language_name": "Nauru", + "autonym": "Nauru", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lwl", + "speakers": 6898, + "language_name": "Eastern Lawa", + "autonym": "Eastern Lawa", + "family": "Austroasiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "yap", + "speakers": 6556, + "language_name": "Yapese", + "autonym": "Yapese", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "atj", + "speakers": 6408, + "language_name": "Atikamekw", + "autonym": "Atikamekw", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pms", + "speakers": 6178, + "language_name": "Piedmontese", + "autonym": "Piedmontese", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rmf", + "speakers": 5015, + "language_name": "Kalo Finnish Romani", + "autonym": "Kalo Finnish Romani", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "bla", + "speakers": 4900, + "language_name": "Siksiká", + "autonym": "Siksiká", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "fud", + "speakers": 4756, + "language_name": "East Futuna", + "autonym": "East Futuna", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "trv", + "speakers": 4721, + "language_name": "Taroko", + "autonym": "Patas Taroko", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 10.0, + "commonvoice_locale": "trv", + "in_benchmark": false + }, + { + "bcp_47": "crk", + "speakers": 4146, + "language_name": "Plains Cree", + "autonym": "Plains Cree", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "saf", + "speakers": 4108, + "language_name": "Safaliba", + "autonym": "Safaliba", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "mus", + "speakers": 3992, + "language_name": "Muscogee", + "autonym": "Mvskoke", + "family": "Muskogean", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "vep", + "speakers": 3543, + "language_name": "Veps", + "autonym": "Veps", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tnr", + "speakers": 3305, + "language_name": "Ménik", + "autonym": "Ménik", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "vic", + "speakers": 3113, + "language_name": "Virgin Islands Creole English", + "autonym": "Virgin Islands Creole English", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tru", + "speakers": 3035, + "language_name": "Turoyo", + "autonym": "Turoyo", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "uli", + "speakers": 2971, + "language_name": "Ulithian", + "autonym": "Ulithian", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rtm", + "speakers": 2527, + "language_name": "Rotuman", + "autonym": "Rotuman", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "wbp", + "speakers": 2496, + "language_name": "Warlpiri", + "autonym": "Warlpiri", + "family": "Pama-Nyungan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "yav", + "speakers": 2303, + "language_name": "Yangben", + "autonym": "Nuasue", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "yav", + "in_benchmark": false + }, + { + "bcp_47": "den", + "speakers": 2299, + "language_name": "Slave", + "autonym": "Slave", + "family": "Athabaskan-Eyak-Tlingit", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "dgr", + "speakers": 2111, + "language_name": "Dogrib", + "autonym": "Dogrib", + "family": "Athabaskan-Eyak-Tlingit", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "frs", + "speakers": 2004, + "language_name": "Eastern Frisian", + "autonym": "Eastern Frisian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kw", + "speakers": 1973, + "language_name": "Cornish", + "autonym": "Kernewek", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "kw", + "in_benchmark": false + }, + { + "bcp_47": "csw", + "speakers": 1809, + "language_name": "Swampy Cree", + "autonym": "ᓀᐦᐃᓇᐍᐏᐣ", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "moh", + "speakers": 1772, + "language_name": "Mohawk", + "autonym": "KanienʼKéha", + "family": "Iroquoian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gv", + "speakers": 1719, + "language_name": "Manx", + "autonym": "Gaelg", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "gv", + "in_benchmark": false + }, + { + "bcp_47": "smj", + "speakers": 1530, + "language_name": "Lule Sami", + "autonym": "Julevsámegiella", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "nsk", + "speakers": 1395, + "language_name": "Naskapi", + "autonym": "Naskapi", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tkl", + "speakers": 1285, + "language_name": "Tokelau", + "autonym": "Tokelau", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "niu", + "speakers": 1120, + "language_name": "Niuean", + "autonym": "Niuean", + "family": "Austronesian", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "stq", + "speakers": 962, + "language_name": "Saterland Frisian", + "autonym": "Saterland Frisian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sei", + "speakers": 901, + "language_name": "Seri", + "autonym": "Seri", + "family": null, + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 11.0, + "commonvoice_locale": "sei", + "in_benchmark": false + }, + { + "bcp_47": "clc", + "speakers": 867, + "language_name": "Chilcotin", + "autonym": "Chilcotin", + "family": "Athabaskan-Eyak-Tlingit", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "la", + "speakers": 820, + "language_name": "Latin", + "autonym": "Lingua Latina", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "hur", + "speakers": 716, + "language_name": "Halkomelem", + "autonym": "Halkomelem", + "family": "Salishan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "crg", + "speakers": 678, + "language_name": "Michif", + "autonym": "Michif", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sms", + "speakers": 613, + "language_name": "Skolt Sami", + "autonym": "SääʹMǩiõll", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "smn", + "speakers": 613, + "language_name": "Inari Sami", + "autonym": "Anarâškielâ", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "lil", + "speakers": 528, + "language_name": "Lillooet", + "autonym": "Lillooet", + "family": "Salishan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "oka", + "speakers": 490, + "language_name": "Okanagan", + "autonym": "Okanagan", + "family": "Salishan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pqm", + "speakers": 490, + "language_name": "Maliseet-Passamaquoddy", + "autonym": "Maliseet-Passamaquoddy", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "kwk", + "speakers": 377, + "language_name": "Kwakʼwala", + "autonym": "KwakʼWala", + "family": "Wakashan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "crl", + "speakers": 377, + "language_name": "Northern East Cree", + "autonym": "Northern East Cree", + "family": "Algic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "gwi", + "speakers": 302, + "language_name": "Gwichʼin", + "autonym": "GwichʼIn", + "family": "Athabaskan-Eyak-Tlingit", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "eo", + "speakers": 301, + "language_name": "Esperanto", + "autonym": "Esperanto", + "family": "Artificial Language", + "flores_path": "epo_Latn", + "fleurs_tag": null, + "commonvoice_hours": 1436.0, + "commonvoice_locale": "eo", + "in_benchmark": true + }, + { + "bcp_47": "sma", + "speakers": 296, + "language_name": "Southern Sami", + "autonym": "Åarjelsaemien Gïele", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "tsd", + "speakers": 202, + "language_name": "Tsakonian", + "autonym": "Tsakonian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "izh", + "speakers": 142, + "language_name": "Ingrian", + "autonym": "Ingrian", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "izh", + "in_benchmark": false + }, + { + "bcp_47": "ia", + "speakers": 136, + "language_name": "Interlingua", + "autonym": "Interlingua", + "family": "Artificial Language", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 15.0, + "commonvoice_locale": "ia", + "in_benchmark": false + }, + { + "bcp_47": "aro", + "speakers": 105, + "language_name": "Araona", + "autonym": "Araona", + "family": "Pano-Tacanan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "prg", + "speakers": 38, + "language_name": "Prussian", + "autonym": "Prūsiskan", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ie", + "speakers": 1, + "language_name": "Interlingue", + "autonym": "Interlingue", + "family": "Artificial Language", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "ie", + "in_benchmark": false + }, + { + "bcp_47": "lzh", + "speakers": 0, + "language_name": "Literary Chinese", + "autonym": "Literary Chinese", + "family": "Sino-Tibetan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "io", + "speakers": 0, + "language_name": "Ido", + "autonym": "Ido", + "family": "Artificial Language", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "jbo", + "speakers": 0, + "language_name": "Lojban", + "autonym": "La .Lojban.", + "family": "Artificial Language", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.0, + "commonvoice_locale": "jbo", + "in_benchmark": false + }, + { + "bcp_47": "jut", + "speakers": 0, + "language_name": "Jutish", + "autonym": "Jutish", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "vot", + "speakers": 0, + "language_name": "Votic", + "autonym": "Votic", + "family": "Uralic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": 0.1, + "commonvoice_locale": "vot", + "in_benchmark": false + }, + { + "bcp_47": "gez", + "speakers": 0, + "language_name": "Geez", + "autonym": "Geez", + "family": "Afro-Asiatic", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "osa", + "speakers": 0, + "language_name": "Osage", + "autonym": "𐓏𐓘𐓻𐓘𐓻𐓟", + "family": "Siouan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "rgn", + "speakers": 0, + "language_name": "Romagnol", + "autonym": "Romagnol", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cu", + "speakers": 0, + "language_name": "Church Slavic", + "autonym": "Church Slavic", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "sgs", + "speakers": 0, + "language_name": "Samogitian", + "autonym": "Samogitian", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "ann", + "speakers": 0, + "language_name": "Obolo", + "autonym": "Obolo", + "family": "Atlantic-Congo", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cic", + "speakers": 0, + "language_name": "Chickasaw", + "autonym": "Chikashshanompaʼ", + "family": "Muskogean", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "cad", + "speakers": 0, + "language_name": "Caddo", + "autonym": "Caddo", + "family": "Caddoan", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + }, + { + "bcp_47": "pfl", + "speakers": 0, + "language_name": "Palatine German", + "autonym": "Palatine German", + "family": "Indo-European", + "flores_path": null, + "fleurs_tag": null, + "commonvoice_hours": null, + "commonvoice_locale": null, + "in_benchmark": false + } + ], + "models": [ + { + "id": "openai/gpt-4o-mini", + "hf_id": null, + "creation_date": "NaT", + "size": null, + "type": "Commercial", + "license": null + }, + { + "id": "meta-llama/llama-3.3-70b-instruct", + "hf_id": "meta-llama/Llama-3.3-70B-Instruct", + "creation_date": "2024-11-26T16:08:47+00:00", + "size": 70553706496.0, + "type": "Open", + "license": "Llama3.3" + }, + { + "id": "meta-llama/llama-3.1-70b-instruct", + "hf_id": "meta-llama/Llama-3.1-70B-Instruct", + "creation_date": "2024-07-16T16:07:46+00:00", + "size": 70553706496.0, + "type": "Open", + "license": "Llama3.1" + }, + { + "id": "meta-llama/llama-3-70b-instruct", + "hf_id": null, + "creation_date": "NaT", + "size": null, + "type": "Commercial", + "license": null + }, + { + "id": "mistralai/mistral-small-24b-instruct-2501", + "hf_id": "mistralai/Mistral-Small-24B-Instruct-2501", + "creation_date": "2025-01-28T13:30:13+00:00", + "size": 23572403200.0, + "type": "Open", + "license": "Apache 2.0" + }, + { + "id": "mistralai/mistral-nemo", + "hf_id": null, + "creation_date": "NaT", + "size": null, + "type": "Commercial", + "license": null + }, + { + "id": "google/gemini-2.0-flash-001", + "hf_id": null, + "creation_date": "NaT", + "size": null, + "type": "Commercial", + "license": null + }, + { + "id": "google/gemini-2.0-flash-lite-001", + "hf_id": null, + "creation_date": "NaT", + "size": null, + "type": "Commercial", + "license": null + }, + { + "id": "google/gemma-3-27b-it", + "hf_id": "google/gemma-3-27b-it", + "creation_date": "2025-03-01T19:10:19+00:00", + "size": 27432406640.0, + "type": "Open", + "license": "Gemma" + }, + { + "id": "qwen/qwq-32b", + "hf_id": "Qwen/QwQ-32B", + "creation_date": "2025-03-05T14:16:59+00:00", + "size": 32763876352.0, + "type": "Open", + "license": "Apache 2.0" + }, + { + "id": "microsoft/phi-4-multimodal-instruct", + "hf_id": "microsoft/Phi-4-multimodal-instruct", + "creation_date": "2025-02-24T22:33:32+00:00", + "size": 5574460384.0, + "type": "Open", + "license": "Mit" + }, + { + "id": "amazon/nova-micro-v1", + "hf_id": null, + "creation_date": "NaT", + "size": null, + "type": "Commercial", + "license": null + } + ], + "scores": [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5679608237702286, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.746881923400435, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4438455475739657, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6320800718582147, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5894973558751632, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7562097956860054, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3846086976522069, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5835344719191324, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4804215535486392, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6694735319785804, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2511517944602615, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4484633445384819, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5820808184424484, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.73788733854976, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5749603738163459, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7240488251574404, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5617561349997696, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7132694856647042, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2963216580569375, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5101500486835966, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15317719477157257, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38800976493585004, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6001453932849357, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.762029391170019, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.30676942927198475, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4968492831219663, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32063971770635635, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5206258401513325, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39086127104761287, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6239956806265569, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3020679767949182, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5246291817407542, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.29261990846502584, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5207965578474395, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23343658187420896, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5188968707275573, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2920008662633279, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.47119207959541226, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2596939072050362, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4394574387008692, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4273817965049865, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6016204186733703, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2777551012631926, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.49423240120783246, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.7964573357809173, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8458636471716781, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.34633672321253084, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5378805625051344, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3582301850807646, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5380305837807603, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.300740577257699, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5272774705181614, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3099603853356145, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5209233176748354, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.35580399268816465, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5392592206305507, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.39317381456022266, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6026058740561834, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.48930936408255293, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.699085629239476, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3963410285961713, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.613166190285915, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.44294247711132617, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5915660675216782, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3756985486608933, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5991443770283833, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5009456904181451, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6893719644090858, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.18273944860385094, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.44261865187418153, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2153742037697241, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4581737688885401, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3372953649368346, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5482505380106469, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.28528905353056333, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4885812318466243, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2935204022158406, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4867597973247361, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2929684584911775, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5038324436049059, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4034224234291925, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5736798834726872, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1077205146963877, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.428338145564396, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.22327767951697297, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4063556880747369, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2572733200413211, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4520014138562526, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.40311197004738203, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5788525108956781, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.45313578977486535, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.6160993561903745, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2651736858432996, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4491383344282561, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.34545319957597864, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5727052860304503, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15815751066481462, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5152611872266766, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12903696060775005, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.456225988032654, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.024459391267874976, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12351824822447692, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.46822754470803873, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3465147345201782, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.08516700886866406, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4091252890943268, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.19194937906573872, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5477665664300843, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4370196290761142, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20669086265781264, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5076721272198604, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17630490037560695, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.48116430160978857, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4122750002638689, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15412719160788987, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5010353699512481, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12369892692249995, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44549610902403686, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12560672881768975, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4969560260291519, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17077058518804336, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5022008374701596, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10784756064735967, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4427230465401631, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.06735571462439276, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.38102852892512806, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.42723260976616784, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1694466724647263, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4902502031746037, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3532931581623198, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.175396614619324, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.49736499605529066, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15154395847232716, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.46053919348995803, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4041678259311437, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1290514243115152, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4766581477336301, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.08273178236238297, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.36399666460809255, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12601482779921785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.43595665254608706, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.40959087443621306, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6348509381122925, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.08214106568089705, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3969463877642616, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0744904632040495, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4111163205685468, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12894104034845807, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4486368934849452, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.10070927557742705, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.43718220262892105, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0772718393063023, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4203683137304257, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0756907193511249, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4138725093679467, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21748353646757182, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4462746462826943, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4179644538349004, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.10505106462290037, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4474870048911137, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.0009218289085545725, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.15653859793617866, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.43177798053127925, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0891537192318598, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3970634926176537, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0950136506275681, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4372017487229785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1259356760989446, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.44568274520971096, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.16322494183480127, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4815584993817062, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0904087252785689, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.41830513174690515, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.21351902664706998, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5130443042033361, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.16269986423611488, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.06939838145153245, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3371547585108182, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1691386174483793, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4920789340026317, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.14944432524273302, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4972796478830659, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.09793316925795417, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4297577431879659, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.38870674200492367, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6484380084879691, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4923751299732868, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6853756490381199, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3996712647649035, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6353525755760105, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5115346945020283, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7037574715738644, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.017834618169115152, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05927156798818119, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.23904922011090457, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3399292774084129, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6152980280400979, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8311281590297233, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.005449161724399305, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.026158029267484995, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24508104771894088, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5725552336126134, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20801258614305904, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.26703508536995574, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.35315040956049437, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.625895188503691, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11133996756497437, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4410280353998367, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17743299460161885, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.43071271897416463, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16052654068024738, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41580120868053494, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.05963579607071745, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.31139762378406344, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.006734847287559362, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.03408121951468736, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.09880177230676102, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3297638349619511, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2377604053257556, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5662768009060447, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.22573408807826306, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5444672928195973, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10742716472890976, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.42694859148910824, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14745870033404418, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.475170637938921, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.21665407194210906, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4344921442639243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.37994652561206577, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6464467277069994, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.09362261118571368, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3452056942265759, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.18917620656425485, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4346170232980484, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.420450507904553, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6503146347305717, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.24894072982768842, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5212235893093335, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.393613605227227, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6492198447661237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.21147734744561483, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.41020178654369294, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2329856851831642, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5405751250637106, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.41756686236967944, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5616829345739638, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.38189567401226293, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6154314825900052, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2126707920684064, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4659908460634765, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.23240102389974368, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.4973274282641141, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.17979384730979156, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4177311931467539, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1702602472176709, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4366640707779677, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.933651069586263, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.9586507529693243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3816408219023713, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5784105768028126, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.18398226639192106, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.37285010531146734, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.26958884543190903, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5631664732610485, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4005296397635166, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6201785376974677, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.15956483578595942, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.425693420655628, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2323385180696658, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5019509292309764, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.22952177306405494, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5279520952576137, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.3618488169166299, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5708179622131996, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1712766252338756, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5225554962608486, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2709079038456153, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.447458019441992, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.38249626297768063, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40976234193505356, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5806197937310393, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7346706700987636, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5793367580502561, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6502428441722727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4855332614117322, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5299556742893647, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.19940445989088915, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.43164821827950184, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2423441824135159, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4429509373913047, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6064630666233242, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6752055521830945, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5357110024227318, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6365941772753647, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14790264259417688, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.27159767590045303, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4751132438608344, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6849386986272349, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08635800047213174, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.218109371254876, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3682311523733465, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11739521786077453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22090491782919655, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.280413108453108, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11547518641061649, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25945846414490087, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20233074088759792, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3746629492952356, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.40214612768560637, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45128424593135114, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.37284875432797243, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44888401040760956, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0925329498915617, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2110486160692096, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.12453389344594705, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.141543757252386, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2594145364221844, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6244631487487835, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6931369519059803, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.25383339228798274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.45896379476820603, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.17200767571780612, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3723150838362789, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.28685201698226354, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3254455687469726, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4474512036484817, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.23887527917609022, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4120359948636439, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3556521383601747, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.594830811413066, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21629114799587432, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3542320138389837, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.27405612859390877, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4639958592456083, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.13004800471424346, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.28217142159025543, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.37821486365532614, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4718665834023439, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3699382260470039, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4032851361478274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.45167594566243024, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5169677927619225, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3780009826926042, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3925121365052661, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.47788592802001717, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.1423412184218882, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2596718628394258, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3572188192648703, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.45381175288762937, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.07425055521504613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.18122341046764998, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1978585723043446, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3527599187160617, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2523019529343173, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4406369072888057, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.41072675483179805, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5635589150380774, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3883375900135818, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4643731845106876, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7123666275414222, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2246029757863831, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5773502691896258, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7999099314029202, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6417603075499863, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7825422900366437, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8503171627677965, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.37709297891717664, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6881502501430368, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5581982021478125, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.652013511062815, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5881561248602009, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40435987083533204, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39858613265631837, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4425973012069069, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.47160616105623426, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5309982646782259, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6151179643430991, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.41238100267720657, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.39909989628767284, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.17181529671327242, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5293474685884572, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4429196299668147, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5802683403568892, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3237722713145643, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7426638026175545, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.49342175914364256, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4352628824108997, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5116862201536014, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.33471616336068044, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2865612242047131, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6433813179203622, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3598792258309727, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5125809225356253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5539920925426138, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5226572946586268, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5073395824633415, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.29382595610734974, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5773664661124461, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7013062757071812, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9303769449292738, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2381658499765768, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6756014232714684, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4529852871970908, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6941474239078328, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7543919667018285, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7543919667018285, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.40276720463657734, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6529271690805427, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.30188353873287377, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6086565367747951, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.6026286934891149, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.8025775976044891, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3272712268138726, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6272846474183881, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.7012294787544179, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8478115719875968, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.21690365808279138, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5384773678665918, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.25711386542134795, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6088853751738869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5695988432761473, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7516103467926585, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.38091370416670794, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6438225861756911, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7202697992734389, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6990707992725005, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3416581331218724, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6578570934289981, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4797543511401896, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7240781310560407, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.6401876410870359, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.7526484951226097, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33057129676705455, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5669225664686625, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30350690419450826, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.569133886912883, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6834516951654327, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3263040636562357, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3751840463233443, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6279894552667558, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.551397074868541, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17470942957770763, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5403400891349619, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.37392149096896676, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6665214662145853, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5460240376042262, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.24343304284910333, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6275577931282961, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6431872581462166, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6255340042200862, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8724783049357475, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4547900039222725, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6541971428810075, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.38305978177479755, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6061131723054572, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7289444696770301, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3684981984538114, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5606332518476288, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4536404448264584, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8020827133708689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4545091839935173, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7166050399790445, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3370129264673147, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7096874943799061, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1624355752882384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4952968469712617, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7807505267551733, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5595205105615875, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.8322210048001876, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8578928092681435, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.42818224355402373, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.42105372680687736, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7001171094008295, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1582866049832572, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.34487142413575794, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15521606028436608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37645329404497957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12620429887108936, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.35580703793872603, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12872220631084524, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.33602633953270183, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.042121062429802174, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.14281404499176092, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.042575418285137674, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05173688961049459, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3045613775157565, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5275070803493389, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2734283774929853, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5252214120598302, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.10203846572325131, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.33381153680096753, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.014935758919429663, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08106107745254391, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.044304867337633724, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20806974344498103, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08860973467526746, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3178004360288637, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15268019045355535, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41028757620299977, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.030860166165309233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1100250143829584, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21255327712152144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.43272151570555034, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.01486609147288197, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13893773605583024, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.06609667473412645, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.26197209338359717, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.26064517697298795, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5092206110218525, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1507980395794452, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4306039128585424, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1438459189500836, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30693371625402605, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0979038733644086, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30211704738953993, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.009624974244068071, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.07318255686027669, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.043420474648595074, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2884095690753619, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.13868172938464635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3094469764260441, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.11091252683001185, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.26607634610445896, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.18154954789336694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4557483776072868, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1381751568911733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3121557499162649, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.20065115069964384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4084885616013531, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12291219097556666, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3448002180666873, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.20608572305725564, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.4704943905570542, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.085416483900781, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.2825804066750608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11452508920842025, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3212742401272785, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15478222669012726, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3550584759508654, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.07875433150726119, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2638954513805452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.10734088848154077, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.33946796348247366, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.17795920517030017, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.41862955401967455, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.19388048412249795, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.44361702376789247, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1237012344369667, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.33331866832253354, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.15589802574348086, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.37894206802233305, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1948502778967486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.35525815981538433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1618333627385132, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3458746996740858, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.17393111207515277, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.39042812195808824, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.19064689695123957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.36954921822756504, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1785851272602057, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3800733399524004, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.20113943179758872, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5054929215592371, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.07088281524771703, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.1725752257112697, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11901413329120636, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2908877283991857, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.15593857496482408, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3832822126692406, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.21107720643690867, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.43911506176829573, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13410301071131794, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3942932268034351, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.30677064886592076, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5308555945242818, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1327526847508867, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37850602486495205, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18405035438430847, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4142901090120915, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.061826017721563604, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.08852681798207009, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3583179111355935, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3857436691295343, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5750224388123065, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5920893212447781, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6925021521158101, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22478613858269392, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.44348101018104913, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.183687049781416, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.351911486970854, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5181825846579515, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17328174803055044, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3178268797869574, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.286608441075188, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4579283646292802, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25861130592298187, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.39452644092432093, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20379250618355427, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41085414309816914, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.310679343206099, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4471183729584148, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2334787866969297, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3621517589760531, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5866873582151947, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.46269559069048716, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.46872641361415845, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10434360980785336, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3012789660952507, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13835317113453516, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16343842313572918, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3986641525285075, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1690979933029136, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3751861276375209, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2840563956846642, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5110250591004448, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.19920413481788912, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.42537796926163113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.20401796878756984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.43317630453631556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2044887070217883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.38471585132587544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2980504190448601, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5101268920225042, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.18831933500600306, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4318025704181776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21544027588567594, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5040038440508637, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.26970223719007375, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5172978597562362, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.30630098078522544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5439056051092116, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.19850842371858787, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.43584341835040474, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20170335119323748, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3541251997977811, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3182774828667731, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.43975656978777905, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.23530033724858213, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.46208607300298377, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.37284027455688556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5528347504734102, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2887308472548599, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.38846174119508314, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.15487293534817623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.39293494862736383, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.21741853044139284, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3535910166292039, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33626819961829335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5466581859383387, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.32000331642122953, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5480591855923784, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.21132630077912357, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4175670766052166, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.13108369255325433, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3929302741911199, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20174045447955946, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.33729298835089516, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20972571494011877, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.395894071208527, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.35369375385786006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13087682931309413, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19462952976787054, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.013538497707846785, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1570208067577934, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4113045280468524, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15082713742973322, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3965911699770542, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15471428129658016, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4580211317461481, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18928475425929295, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4916060435820526, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.21940429389247643, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4343280866601455, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1824401863423467, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36709433185688595, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3377854698776805, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.521201229892482, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12475846123062707, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.27823340731817514, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10759927692349745, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21065794536310511, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.07843772989359644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1324578891826276, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.08163977068875294, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.09047502044256338, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21669141850731985, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10322985794794913, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.24491122482530842, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11809057094812304, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.27930342777387007, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21268444697113978, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3229997133764549, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1475503033983142, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22104108935973044, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16434349396840395, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28582614857210975, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10085167559661873, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.23831215045289575, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17543744527808774, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28201016956553354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.12274092982883021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3385513651938691, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.31017716089889963, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.41775824162589076, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3228288840559658, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.18237599479708327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3740403511567824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3253153379449275, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.119159749312327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.21297942664093145, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2036348471340078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3472831655579266, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.24362353508932386, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.28135849152758385, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.14482189302397735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2913876815877049, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.16306957103469613, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.28112283847231073, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1308613527030366, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3063146286877558, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.21931515993565381, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1441966459257424, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.14957316612525498, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.27675048474641756, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3780460244391623, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.12503614625842938, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20624064341134082, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3368893372278425, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.2961559727627133, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.12846497020051437, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2670865602673704, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.28252374116432993, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3549531183419122, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26128489301072644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2126837065505244, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.07149097424598219, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7774075575820374, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8943538262827356, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18639667871924825, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4540232715517938, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8431643718744966, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9341410275694613, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47095916883357913, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.701526330557871, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.38260294162784475, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6692418584049541, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4093629115744712, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6243156092220487, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.36703839483583006, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6725357332891145, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4322450379367835, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.697398762810304, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41122010762096617, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6697492221087861, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41126318495820946, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7254294465493162, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4682601513034942, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.691130012325589, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.35334199245807973, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6858610070406853, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.33061666631099795, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5343307680770133, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.38981415389445495, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.665622189515994, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3223937524276847, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6719135382778884, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.4466645979681496, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.714247354760266, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.6233091888805312, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7757111039890131, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.33414322499224436, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7159580680193959, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.6620694102966999, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7893416551805176, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.26540383860058264, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.51610805930355, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.49335830881778164, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7240615166053675, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.4024279293206815, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6798070651801875, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.6153147385756811, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.8160952378322835, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.6838493012537611, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.8178509424142287, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5169198985488462, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7879691803533485, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5223010192696725, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7442134884509299, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3885151883045163, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6763151870864087, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5985488590218004, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.8248561222494313, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.37163791993879014, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6792432753943116, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5152630372775983, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7696821316655393, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.43521980294891405, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7204319998551938, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.46417187236805535, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6653227698984816, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.519124054532681, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7733428788002137, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5083170211670072, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.755952798269267, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3161432307247198, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5990810117425377, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.40980949787910764, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.7145653936496129, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5770135999436572, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697316849447288, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.7030214416074754, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8357829168322639, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.49199339399396913, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.713934780293142, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.5002824356846001, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7029341279811726, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.029124970213905314, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1779610499753793, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05989397907532586, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.13539167567510446, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.03073685498855941, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08933758530290428, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21051269871304829, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18854722085547196, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1387123733773652, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05499461839884487, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19978068293555115, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1388011701223677, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1460389336009171, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.038796252164058714, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1756002877791377, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0029868578255675027, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.015380253532528225, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.19065171436703615, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21083781655774478, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.14590438247348272, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.04379419293412465, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.15119622228734425, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.21315318926996712, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.16991425356152365, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.22371589981083434, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.20982178138488494, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.20189358781069322, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.20261685251676126, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.226729844497646, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.18184342512086546, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.2185121523322681, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.17386106914161167, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.08272059515141832, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.1814025725787457, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.23945930551153607, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.20815933215961574, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.09886053260067004, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.14345644530149382, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.19097844728039898, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.08246021416977749, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.18868639139421345, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20665565461558383, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.17764901410543646, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.19312651305380893, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.21371557282714232, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.18854043679878274, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.19559831357902827, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1914895496057553, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6666935927206881, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7957561291403441, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.34999116613463505, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6356075517191035, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.48649824146709, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6763447333054696, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.367622917844187, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5615050712672139, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4081538556642202, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.46386216052527535, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4300174433641992, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5099800158255156, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7963205130973803, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8101688749569373, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6570128212612868, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6262090565616182, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5866943184579982, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6390393619950272, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.01047222192173988, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5683565265173782, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7072367582469653, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20287366424876002, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5368464080033196, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5198707241967666, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6993305416237223, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.36603776814499195, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45532918164901276, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13525036115537795, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3120848453730729, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3474347870952493, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7073395735740273, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6577952971578602, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6570128212612868, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6221526807313811, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5460462259563637, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6641829079106271, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.04884431803904408, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.18357384275951122, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.28073304156067924, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.360657984953223, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.46365764298816153, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5757521453586436, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.3147715014841853, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5986154863155839, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3885646234110734, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5051669760132699, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.464413403675355, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6291656356697347, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.30490938758882236, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.579088460457721, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3758073513458154, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5302950018189692, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.29308025637967977, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5715200997140051, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.43285599641891276, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5551678521355665, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.25984882476296983, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6305744214119023, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.48649824146709, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7255446918266525, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.464413403675355, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6853183317800515, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.34999116613463505, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6356075517191035, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.4426623526629488, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.612058732370435, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5522004843736675, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6166558670381421, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.37954187220913477, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5550325994532472, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.3147715014841853, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.521228891025682, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3964513253420688, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6095420129111676, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.36033217429111203, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5550014071110869, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.33403925633579773, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5753930328058733, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44882520213790794, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5856175239899348, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42760828727369016, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6065010489098535, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33403925633579773, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5915394296427854, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3212785834179169, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6158121620368939, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1751489536280261, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.378593296276962, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3214110553053944, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.49232390716994445, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.479033905070678, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5975149526416976, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21177549089429396, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1424915360855107, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23985076149753726, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13309638637723345, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18696197122203645, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12256515595630638, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23303109995893123, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1419886619859991, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.24113733359485448, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1324448705928064, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.22863839042697148, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12017886776600228, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20794486026487116, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1164257728844972, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19249901344360867, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12325384013681445, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1960232617116645, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12806473847444227, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20054688779645718, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1345714227066951, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21078968525268058, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1802615495980454, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19630112442374525, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8212614342207556, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5888582552569348, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5888582552569348, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.43550490048931545, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6419345531187637, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17539593635425982, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3139104155809725, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39225487001250453, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5189967318357492, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12859070457371286, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22162336097079333, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6258765997974801, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6680248455809015, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6258765997974801, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6722124517361844, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.17023327167529265, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.2521455524828544, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2229548791980166, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.15247670030930355, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.1324448705928064, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.23382021475411732, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.09766807787022613, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.16788063248730647, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.12111615182138995, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.21505717177216926, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.09979796185764318, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.1310501345458609, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11512937599552589, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1852451960926282, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.126642985054506, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.20913543330915318, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.12632059501697884, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.22490978846607526, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.1352612651586241, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.22176710342008016, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.18982400330057914, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.11760179026027952, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.19531596229980544, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.09968269909242322, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.14510210137368384, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.1204925245474865, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.12192273449574796, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.18177358407861108, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.16841504132177978, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.10667790151233097, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.17427579502643556, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1508875367739971, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20889434105456664, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.13184959768302618, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.30505662513933907, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.09878901581794378, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.1651800705978423, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.20736628090200235, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11824658049755846, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2047497542808756, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1461072488843534, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.1946917085815184, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1018151014848322, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.14524830913329922, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2589080403198245, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2189767496390278, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.09761931247072746, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1397102655312677, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1326689502117876, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.167569694983793, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15848968577272604, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24447662789322752, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20665940380705064, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18243716955007858, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.16168125580314086, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2450013599045987, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20901732384345645, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20222677481313764, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18492694642397273, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18243716955007863, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.16667457585564618, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08556679632324991, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1575852366903021, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1474874322154398, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9202663016973823, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9263876898254182, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8621431910551439, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8363304387269249, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6656058483395763, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6306557167105028, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8657947138469048, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8367521498141209, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6993348038140574, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6335836519040372, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.11064738383914807, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.12449466772796605, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12222372495044852, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.12383047729216191, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.1392580908972882, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.1333265070823728, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.07717159074475938, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.09413026539458375, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.16807498532991816, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.16404257857373192, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.21005284223037346, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.1679703861465872, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0951509584925814, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.12014553061064691, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.11737915185320068, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.10085050674562507, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11377195287577829, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1301681094143453, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.09455636771034115, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.11463120929696417, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.1544787887603271, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.1384236976807813, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.11488572123868507, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1455973492295447, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.13735441291745387, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20255423961944058, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.205408273869532, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.11470196605012067, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.0960438892364715, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.07184436307032757, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.20378989148152887, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.16337212771611656, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.09669863605676213, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.10886215421099144, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.18171364159867548, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.16245793974098002, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.07562263205281951, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.09819928715831736, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1430606569063152, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.128073928655324, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.09526781380423786, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.18223449608285797, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.17127401148639734, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.09855718610544388, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.09669863605676213, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.11679541132562438, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.09643517424337235, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1226126790254367, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3969253441303859, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.43277080710930865, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.26887073704667247, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2918476164856665, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5183146371291372, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5942793492554739, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.028864519535915668, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13535086012687783, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29687399422087424, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.002376388269368755, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.04574695485583133, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2703094106380642, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2982249908859, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.29313061087267483, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30295384730328956, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.284911205299835, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.32067889250923776, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.29353055611145706, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3381266475327612, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.09910529437987022, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2511990291834263, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.295394335805579, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.362515947701148, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7243776840931383, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8980107630353439, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9325718821645923, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9490053815176721, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6653044831075519, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7986980418662383, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8504591592783618, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8980107630353439, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5950322600507224, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7090542316843602, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.44768974737795825, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45520472994232203, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6509298345623671, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7962234681835563, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41813929088914065, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4779008399806691, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7243776840931383, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8642805496461259, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9506885335787997, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9606382935593174, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8665175293126633, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8642805496461259, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.342569723746894, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.47156710056973744, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2319934375578505, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3367678538644817, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2261681529206079, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2647144854968396, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.35554722872430145, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.38873710544604445, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3720000272862786, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.44695658930348453, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4118588818865406, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.48573453292579605, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2998354233286452, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.41144215385645566, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.42142495511264777, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.49708063531780444, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.33296735510279596, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4176386300927819, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.32522259162581857, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3572499606049779, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3449668516380805, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4341194278942322, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.36161896085795575, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5052818563161547, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2798191316489921, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.31866179281073254, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3170440263520106, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.45327673850268096, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15538140800156827, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.22365453282977818, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1352815632479558, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2610624350708668, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.35907597395908514, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.433310273977633, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.33498522957587384, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4529680464694055, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.30675389390381064, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.49190118767827684, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.13922661372145656, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.27553494979330584, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3515170550015674, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.37881852198491145, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.30950829536527374, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3839157172568008, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.22141947821999777, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3633108862011865, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2957849631521743, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2872269269040579, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.19474118932727338, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3257294949902081, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.26505727008662233, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.41342120940573923, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5489548889989204, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5292552311493306, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.43141660874998483, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4251732952639193, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.479859141564773, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.47978767796651084, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2751349202729036, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.311148395820729, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5435154526669127, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5395341377171525, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5777979902630328, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6331337405946555, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6121338866063298, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6222767269627676, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5440627210252523, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5801365308278273, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5097049681318312, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5622473457673939, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.42567378467735034, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.470165978205223, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.47594607773277786, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5363851621507516, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4533373633026252, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5042718376547173, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9682566771439106, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9779127328168863, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7040822331405046, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7673268835807536, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7639225615341296, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8135226479972402, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6729400620282456, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6736973998414632, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7640211005075139, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8179683170395244, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.43141660874998483, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.45005622460103567, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.5269212212163125, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5528502361092263, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.6736973998414632, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.33491174038847354, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3646077683106875, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.450293182440332, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4822292034174927, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.19834633509680927, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2712763621688402, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.546749262754264, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5830342194369027, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2754139367364165, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.34665831783057166, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.42877544777223947, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.43803970127356867, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.43908893511874636, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4785460996828672, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5898466143484524, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6611594562951559, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.44701416909786756, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5245065297475329, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.31417347869916407, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3530975487930333, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.6373258340947424, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6437421244363288, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4715455630189013, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.543275675805182, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2807304798995431, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3418543172008782, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.5397682182130759, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5703951757357331, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.5446420954986508, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5662782206307382, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3378721588486122, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4362453299175689, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.49288474585647657, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5578180330951528, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.36197274748300795, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.36134314178088084, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.17060055774694924, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.2566677182784047, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5717883675148524, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.640780099960748, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.221071468018936, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.41620491059292214, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4263215396273059, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3711481893609263, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4101392170618868, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8813081534414112, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6486802664285581, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8066891982024211, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7344798528986015, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8855631322316195, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6486802664285581, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8585894188661937, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8813081534414112, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8434569599214109, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9123500588239437, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7849324644314795, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8934780380564308, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8799941663695641, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6809354000776107, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8640242853252401, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8841725044915145, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39503194300684213, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6916289318228928, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3094285625931604, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6328843883953666, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.30888995556875376, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6801864286113619, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5512199399393973, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.45862256824436665, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7660160731572102, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47770079267358434, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8053780976175922, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6259358824502687, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8067950339997761, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5296344689827603, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7183083787484315, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7568440125092788, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8347576899702969, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3001800600660342, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6794930944968381, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.18879642915927602, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6584653291380502, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4697979053121435, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7554660353280213, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3164389365959547, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7121929522648841, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.6031798395521694, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7819677495994619, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5646631238098637, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.836206348617966, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.36615107686578496, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.696074520676609, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.1543252261021413, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4932064977882042, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.6966863379186454, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7941296295595748, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.5487584440377526, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8692797308530646, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.8787142254774354, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.944457825946867, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.5463887965663883, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7033378749149323, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.4912217876159168, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7991339910300419, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7251215108320924, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8334871013677937, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.587725019570444, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7957550794048827, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.28856268147560865, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6187787024786685, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4402122771181734, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7716344099519011, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.18465966669442654, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.503938463452404, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.17973438065210462, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5509051817440759, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.4809103179432793, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.7499547288317748, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.6244070585346295, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.8433626077474702, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.43660156107563336, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7165816705519701, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3748533897614559, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6863935447402433, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3607442374649342, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6876955247522804, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3718491333506089, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6941552634040441, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5110976370499285, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.842915559657988, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.5591535564944223, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.8079980831297509, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.11809858631445573, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5943886568930294, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1423170365140828, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38605131339325, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3230989128220882, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13860487750886114, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36659667376085786, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36295227908523897, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13860487750886114, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36118801210741663, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40877861250593944, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16673024281943524, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3975048254243706, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.11262865194228103, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36030161445252334, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3066941236048102, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.409404483413751, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3629681915617596, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4136500403395244, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1909693288724605, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4115524982336727, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14192760409508295, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3989311390496819, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20304460086424203, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4966336271433132, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3935462418730863, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.33523829330170474, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3250861966671464, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3051626462022859, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30944349609311117, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11556522074454477, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.372688132616477, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.22392361812003433, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.460938469666163, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10704943109718215, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.362953271903766, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.14392660099814805, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.376362134090542, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.11718316363212337, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3844506520287143, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4024646900219184, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.08197539732074254, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.35287478964221025, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3502198678697797, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.376636825008991, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.30372034137078635, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.21481172921264619, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4009028477501074, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15065778147399764, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4580508275161034, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.21281360709834968, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4292702902558381, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.13780534982274106, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3273034480518148, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.36078900962911326, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2491467453273127, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.47986445165634506, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.10905122148101043, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4502571446121065, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.274959074733397, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3607206140473947, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.17796237395371306, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.48209511527864385, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.13644487773607678, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.36491236604183974, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.21850577875478958, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4494281444270959, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.31361999490423276, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1222354265296326, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3727252294250617, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1109484758001971, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3612426584883393, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.20356858406857398, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.46358366365120834, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11530762783711283, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3781690117672006, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.11907182322580316, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.49599003474365394, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4220964985804286, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4455062898838481, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32026140564476524, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4016870075045671, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.34697616124581016, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40373943351486685, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4184617303786878, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4321132548050678, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3499900041521066, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3822330369569219, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4220964985804286, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4455062898838481, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.41428013900466737, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.425713879206717, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4184617303786878, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4321132548050678, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5102296603076779, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5412065437629714, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32282559495424096, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38266426308756574, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4230074457298372, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4432451111759523, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6363676859401174, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6744544901797789, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9271746317040298, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9736668125871423, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6986939462620247, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7821077250864037, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9184678024441792, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8884834862973964, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3797391466432489, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3481158447116987, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.31102805827817165, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3375837027261476, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.19710660977672484, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2646181750020499, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3797391466432489, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3274816319655301, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.28493958837889694, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.35876163607595707, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2485364833746714, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2873862688213756, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.41664461891968263, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.42600414573009276, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2710684964643971, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.2982841390442802, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23005567239800093, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.29184715566281483, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.2741455993358603, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.36403543443534025, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.34279101776553306, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.42600414573009276, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.30955822779938535, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.39546682876478195, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.39475108115635776, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.42154888635191134, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2781617026804374, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.32302333182207527, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.08473168573832755, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.25650903369815853, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2883871807684295, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.21660761852515356, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.25414220830184964, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.32910644083871465, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.29306886812256966, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.18084108219203518, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.27583433958197495, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.25612947694888455, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3002607987321696, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3216291288446239, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4272249853925079, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.188590266789637, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.26177705380820604, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3308736026652116, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3875427536757155, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.28432597056103653, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.35944124408933287, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.23631465024334478, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.2692006325646732, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.259615032947222, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2855780701161316, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.31343233007308363, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28662182336952924, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.289946670354745, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2585958231966256, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1574562620502688, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2833933092608246, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2330649391612961, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2858508520944113, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17248469309075373, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3673041887389201, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28838937143148047, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.25480888745972646, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14839290005301392, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29565285341782266, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22266775943086, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2504422832248121, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22563365567811913, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12913533075470382, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.24776496881674256, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.08680476715745516, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22066482174709295, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12117880855911824, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.32137825349405363, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.20104685618767446, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25137213099939626, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.28372673673489807, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.164799256779143, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.32187376249458133, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2969522070783606, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.14440617372843148, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.27200704330334224, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.2442053369522631, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.33050427873462274, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2133219421911448, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.3424665224706109, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8944054777319608, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.24197054442617688, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21682999057776514, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3722897460532404, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.24424323100599224, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2205591704292585, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3479467223515336, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.1926917267834754, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4545444680350158, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.17580772500133016, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.32957763052496886, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2148084015365523, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.40974307981059804, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.29622141199363383, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.24146688269469918, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.09958408398703665, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.22890983822248492, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.20795712301883962, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.282761705091657, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2551114536415265, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.18112053860965763, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3266298821510716, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1423412184218882, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26467729752192487, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.286072901441292, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2851456053265138, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.09858834583812252, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7445389400758123, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9134769668037408, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2506297252541463, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8320381765431424, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9129044064886581, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23443139907396643, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.29972668857564216, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12409597120849801, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2928237514438983, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15083364266523736, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.38662429787924074, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22849324967229787, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44152236347960977, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2989569143807341, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4042166909648807, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3423939053207622, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.17611268473423294, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.20441543914149457, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.18928624746011372, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.43639616127375797, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.29213008358451265, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5828788445270403, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.14679869139754204, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4021419566569229, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.329340597116918, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6347143291802012, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2868708266227936, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5779499593492363, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3436610762802303, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2782087319667435, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.632418768195088, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.3083012995502152, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6392851743718383, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23050898626566632, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.48172150010681464, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.1969221590285716, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5644899370701738, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.32594818888335836, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.49646222671189383, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4604008032403599, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7444026788985108, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.29161716271402766, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.47302621872495865, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6854823532900025, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3546725638586892, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.21468316165048362, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6851126041819388, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.250737833894674, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.40017617077306594, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.27204995504877727, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2743963944428051, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.6341922683775969, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7252122374710612, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.12586347848916266, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3554854950683664, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3889045463729729, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.20229280648000492, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6194717199605934, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.31114459650134146, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11856660123276004, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.34601719602607445, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.40072710492884706, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7206046648616748, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22174147515312165, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2117279815687756, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.33999170096577974, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29221353951377876, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3058731661111107, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2747352174231836, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.42736771185803385, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.39727964545172, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.10975022749274138, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.13904829787402162, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2873518361947954, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.24505805183333226, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.33495074569972355, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3454509072842772, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.43090467385890824, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3813511699401743, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22765977642995502, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2247283208344801, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.30931906627981315, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2527893205238235, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9210500207490827, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9069369532463243, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4607778969984477, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8103868370118212, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4885014761119101, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.827819363745503, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.17903870455040152, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.18440575845606422, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1981763713215807, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.2520139548059959, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.17499310607879404, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.18175908515502465, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.10089587713517954, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.11552870044063634, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3168035112884022, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.30580678632835573, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.10825039887617824, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.1278708456868984, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.29705138694670025, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.2780223931578523, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.18986262747887736, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.18230825914917978, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.42442305789888696, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.42734795538422576, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.18781316135387768, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.16808430602651067, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3454156644973841, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.30446460704247824, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4479597674250984, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.41132840401983517, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.10704445941620296, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.13527356658034445, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.09941527806251362, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.13609735884978696, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.19230259308735756, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.22211286692050705, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.20383889880388334, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.17813562619757226, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2986551380628858, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.30308773908860176, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.10536111661637193, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.13679626017050403, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.19732230687816163, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.22765162763479738, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3987203877706927, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.39992851145514274, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.09467800236923245, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.11434380596647938, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.15034676904545285, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.36138016740101575, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.31224382417562974, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.345966570287759, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2816115803298224, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3461146475963348, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30131374176129855, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1552102601937674, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1381803727119777, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4967067363118649, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6330776418175281, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.39501632817024007, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5629116515332234, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.44774758283371513, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6323151453499094, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3353166764160673, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5279751808070301, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3340392563357978, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5542299582982266, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2288355034549531, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.469883747317403, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5472915485853102, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7136367183558585, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6159995640523437, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8398584608765305, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5155625728615272, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6435263800797054, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.32206162101132135, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24125880497129865, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.47825499190432214, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3292010361291119, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5670300297444607, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5511532346688224, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7550305399541021, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.34537865578685034, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5956718372193373, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39080227521872696, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.621048393466749, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2755396296659942, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5033588333252278, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5795086255869999, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7183582779188291, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6214211316495574, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7844755306149331, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6008383045972477, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7291842011448325, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25418196696822093, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5117784549266909, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.274941620352113, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.4651004879148919, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.22743363869750483, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5634710936922129, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.518836150464752, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6242496691584447, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.5989032124636781, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7291306908177887, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2677353447271197, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4460422364967209, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3558785149067877, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.570837784052645, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2624310277292268, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4915471393606767, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.28489318277723963, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6000278331909762, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.5728668995816387, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7460634178179616, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.46507550803536196, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6687857543858925, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5155625728615272, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6435263800797054, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3639412530979476, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.654342605671994, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.19882981891203355, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.45714526865696425, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.32269274420690436, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.49704406859630557, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.42849655626964983, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.662646931303495, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3937441173550755, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5600824723479425, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.23114663823833642, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5107406700140826, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.419793811546288, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6152785242440109, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.46507550803536196, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6687857543858925, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2296660762967038, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5259172094145851, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.39501632817024007, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5505822266189535, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3215000448278979, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5947774549102596, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20870371467330825, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.40726160697608454, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3460579711860666, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.43910565102067395, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16692770661327389, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2940239540182693, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17589867762235817, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2991014535844428, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17340302865304977, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28581037214602456, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1782509297990519, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28710039249342334, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4901491669500622, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5638035394617603, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3460579711860666, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4260473803699743, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2011131382865372, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36314253622836745, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17598839092477797, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28650792027744043, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15997462319973554, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24731742205813823, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3980108204104697, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5611872124508993, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7431443902355421, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4465866985385432, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6260699913485588, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4465866985385432, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6260699913485588, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25509991414681377, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.505614827211273, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21452424426866915, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44780791445343104, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23857086413632697, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.47971483823439903, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.446411600799131, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5816697577563045, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4664526119731094, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6399376431552989, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20156032858716424, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4855075115512445, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1526900266679129, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41716995830580594, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.23259933287371404, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20835831728362864, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.49812931259693377, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.17334119484500185, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.31463785312250736, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12522096513057643, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.335302418196347, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.9100527513271326, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.9584484214161733, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.20156032858716424, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.35007862377558696, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3449632275226908, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5000457205552167, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1529699053146309, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.35702516223197556, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.15975615838102766, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.16928451900289662, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.40173762794247314, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.16038844415635037, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.30359085570641314, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.3595283251171754, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5790446318474887, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.20563705341552085, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3762774944524412, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.16692770661327389, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.2940239540182693, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.14165832410287266, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.24107149684266257, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1258646065963102, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.24857006332411635, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2519649154562495, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.44974180175388206, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3253958243003269, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.45173371737296786, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.27618177741751665, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4305107132988055, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1683625745315614, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.31167225759119427, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.13728361101885644, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3436250633828196, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.16353712933127018, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.32934735468962634, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.48680589893384085, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6190257724123215, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1551293035275564, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2674082220133274, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26091874007348304, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.17598839092477797, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.28650792027744043, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1332399603607437, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19971937750838645, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1834283688193615, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.22588088032876846, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12425342874478343, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1660533764831914, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15538689193055893, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14158209035366248, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1869416235999822, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0578819658044546, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2206817446345091, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14914968848461002, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21702090583674813, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.138685682297543, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1258687317121735, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1327332961698289, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23556366957615363, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22894370639738668, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16684195647378827, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21420692177337528, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.33150414660895594, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30808679013173407, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23556366957615363, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25521078373566897, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14257880024595157, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1979524022915653, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14257880024595157, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1979524022915653, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.260711748598298, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28143225165615565, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25621420675166556, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.32613185963061736, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21310996044302127, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2620829676028965, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08892786873926031, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.14069122234920528, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12273033502938982, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.15070376710164984, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17376029392152273, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22421987263715565, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.07369293827420972, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.08728042965046878, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.12416744870990627, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.06452498627127952, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.09758509152849626, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.09985298970743903, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.22158794642706012, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.20787168962643957, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.05401240601013853, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.07243671671799473, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1543646468773244, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.09348998462584433, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.130990604448226, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.09885362316286796, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.15900429623613993, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.10903227170832805, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.11481934989482791, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.1745453831609756, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.046916282267844764, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1250076305588977, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.12985392271660248, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.06737080019124615, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.18629057860741663, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1504281768235603, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.11099491388125307, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.1201070010200949, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.08702826664587757, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42262353460370816, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3966051357904673, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.09612004569821603, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.10249207815381514, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.1341907303110576, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11635402454082566, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.1636348970852316, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.06028131279303415, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0901676620993871, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6983671476675032, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6697193437120026, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5809024483660724, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5409616569206442, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5893051076561628, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.555242666304663, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5197038614969076, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4944106522194635, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5863087308455573, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5756247354842696, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.44763438063632005, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4327706284829231, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4562933372999328, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4354000091116894, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.650945489442927, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6025447507087655, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5040260890269513, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.48159079549233025, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3966338449810425, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3940867714969907, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3186669369694382, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.34867169182256896, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6350785093832516, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6188888500556722, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7997394936755756, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7811228513409922, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9660854289024723, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9613867167137871, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7158159753911548, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7127947486849641, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6813410498464633, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6671821168913319, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.45066539224706753, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4254592023616511, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.45779216736532874, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40945502186629257, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.40071581088356767, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.36844216279073794, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.14609848125563302, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.18504017619904287, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4184317523303411, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.40500270963162277, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4125433652059801, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3955923992862865, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.47182538941865537, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.42450279333172475, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.46492333059956836, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4401112788616263, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3967795858478363, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3803134453035716, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.34915707707242977, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.34988691421168616, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2613611691981996, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2740054517113319, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5600863252474344, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5179797138258272, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3461243385522883, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3560268535895035, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.43650008892828823, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.42551924250056755, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.504580863725975, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.46703102558879955, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.36954961729302616, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.34760122558190465, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3803026331533805, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.362200056491149, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.24777987943516128, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.2952194113831596, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.5258092834799059, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4981801549352249, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.48625052891235754, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4290939038872796, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4045007320789693, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4098113348256027, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.44158642009003995, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.40903259597127894, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4946406341236379, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4719975064311173, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.41182432358851845, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4034715718148006, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3693186725771347, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.36304188784855995, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3692675983091899, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.37402683054534963, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7645786047678913, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8655501219338723, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7858164289172753, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8872272977237059, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6898913050782208, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8620687741940413, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6898913050782208, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8528837782425732, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7708719635370461, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8793197587693242, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7708719635370461, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.888538633093067, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6840689169974626, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8314419144081646, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5819799380263497, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7407958979814505, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7645786047678913, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8655501219338723, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41098733201100757, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.651283133493195, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6152755816095169, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7669297251133314, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4250002996145258, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6670552714553488, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3735617779670567, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5773479111816255, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5543498698280007, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7266847297604082, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3716332023564544, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6132388888021502, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6986939462620247, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8497711598086016, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5072570733389083, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7124868368374351, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5907596734005102, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7837270250239556, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10008881112800158, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.29125356488795046, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.041649157343430596, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6587480145435196, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7917841426705801, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.7446828000198126, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.885521980076414, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.6466833757622275, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7737914417145209, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.4447278656331358, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6742569711624775, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.600047216971444, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7511423755179258, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3382340617900419, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6182585373365673, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.6069548573053054, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7630436854704967, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.40482952759410495, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6241130944295542, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.5021718181363274, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.697189669759932, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.7858164289172753, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.8717639062922423, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5731680012014568, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.746935173521359, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7224037170215811, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8452672523905139, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5724496367057007, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7350859720106757, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.42250552136302394, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6425389837629188, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.7645048342610411, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.876234192352485, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.45751787171307623, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6647794363792763, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.7623067286250759, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.8682092620191191, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.46189821859121283, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6442319235751083, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3931991982536581, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6422735790483707, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44644290381704027, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6892051604181435, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4000177797533498, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.645169701736652, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.4479818542603719, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6761961025641056, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.7123871749204508, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.8331784519293958, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5749089871602278, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.7211428196508521, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.38506289173931413, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.6152360906748179, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6231488481063673, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7734960210241439, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.693261298341864, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.693261298341864, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32329508170352383, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6141330847741713, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3837983925863447, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6366757448341102, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6667025833042813, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.746973053424487, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.217554942150074, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4859163400220353, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5198655773563042, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5198655773563042, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4965705242699611, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4968400811224627, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.35479105265934485, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4725761870926308, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3301899334885226, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5632801217523468, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1923904871441659, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5825915593253297, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4892199210635081, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6263002679299042, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.09147827112247602, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3360691966057836, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2966218714191134, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5348497180679597, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5209701084013916, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3254074668234594, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.540582703782851, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.22935466869603194, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6357138961264384, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.38769943713308697, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6179897670313796, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.35964066074252593, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5418421848087059, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.31666472263798334, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5096984883597744, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2656621439255861, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.47187800221660153, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.41583634222861793, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6558319092753532, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.26633048164380024, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5038200170930055, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5371525807924681, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7677378485184402, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15274299622833287, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4692950277268683, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.30626101600123445, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.583891679561264, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.18137691349228668, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4586072719105437, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.23443677523946913, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5163278972706644, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.33876931708826047, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.550413577565279, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.14207405313947058, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.47874702297210975, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.2539342198718324, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.46375067718601715, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.200726550812963, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.41645295439394076, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.42995245074388394, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6515566568079457, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2834052290575623, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4974109921343301, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.19454290935168927, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.49909763892228687, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3837983925863447, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6379993550810827, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1481394578697113, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30063818852404856, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14216645907653844, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2737034564138708, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14939354788683526, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29041654772860626, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5420662441541858, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5445089463670787, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.40919282596076484, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5542936932152527, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5928902071159559, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.647817438132439, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5928902071159559, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.647817438132439, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.34641959937802264, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.47549559716182727, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.42461633178803443, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5603699277937889, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2340216139262901, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4522093023662336, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4132352454218328, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5544725906870476, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3951500216160541, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6089660957340174, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.42282359171428024, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5395092365663595, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.35412968165085734, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4985795126785612, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.1598921499894403, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.390187618292215, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2400540439585043, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.49297433772099697, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4806604068305994, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.664228268001068, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2340216139262901, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.45184273575809186, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.16533113836624475, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4074791764578974, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.28547397706062927, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4838477808123968, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.6053011982655683, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.652613765735072, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4229247984636106, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.556465536088555, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3471790743028735, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4458106286047354, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3555508425572384, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5387745992013905, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1709686260975486, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3940091304204109, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.25958657290343434, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.43162699627918094, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2213908395073965, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4213527844474163, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.39696685122270786, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5497060467823045, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9411583614202783, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9389202454786235, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8775848642818888, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8618703443763697, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7861888156926622, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7987489460131649, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9319748402595084, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7613425680699503, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9020031517329425, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.883570112979728, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8263460336753243, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8060322164809728, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8980680846396624, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9491059403137463, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9664300701360793, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457224261353452, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9708225134054753, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419324607589119, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9619002332717353, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9189927159116271, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.895905738615658, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8719916488298841, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9397108105925289, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.884345665982421, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9584454525436005, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9643081480127652, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9067144042813564, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8781616442886918, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9745733081082687, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237743711831492, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659571253320222, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9044755244774213, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9016506657203592, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9259203238585231, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9226314544302758, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6237003645369218, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.919365977563579, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9113270242697518, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.898943894327586, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736119227904283, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9415432301630186, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.973004167300919, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9617726716367615, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8788632576179716, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9442690941930104, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9167527970009353, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9264966822048945, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9760432643638268, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9290639912797567, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451284616565533, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9571970948049097, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9428452278208271, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.924510998540744, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354255661287414, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9038448099971822, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9290214610132344, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359307328554756, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9462257677914746, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9685511109758306, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9466350739636148, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7785501063601203, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8677672451180615, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9519685270619841, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5524309559543085, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8979970994003059, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8979970994003059, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9598023304313453, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8678877090803476, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3628854370408249, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8866932684030095, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7932574787392968, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8840632918991035, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9244224424282228, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7493760739956499, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9434070582654602, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8626111481890223, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9742381587466754, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9614829239512629, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9634058264556766, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.846746937646691, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9416090102549223, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9586487245465463, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8628736669093499, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8883148663773122, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.921000444185013, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.964284245003951, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899852954654377, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5884852453065169, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8943359440390058, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6239646156236577, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8782485779028959, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9219735185328113, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8379214027434272, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9171135147465285, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8793006100154936, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6764135013792538, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8320911917964368, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8320911917964368, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9020259333664543, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8443316591536836, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9062739514559724, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9243814194896306, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9257122714800141, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9048929676970495, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9233238051356927, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8961117810241208, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9137011072166213, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9144918070375806, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9447475462972004, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9418568225974095, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8631885674989124, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540570534869818, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9356691952085903, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8263666332486633, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9187937618702817, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6492261286778312, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4782990117524071, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8728890059382535, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7924841060781368, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8728890059382535, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8085699807438939, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9309167160514913, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8335210974928002, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9407617520385465, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9009704508776215, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.886161550229872, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8864780713525466, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8619950335517561, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.877644990158928, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9473578431592224, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8989284887461744, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8982857165205713, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9421743042333945, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909430339396572, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9114715597392106, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221676855227006, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.903310364652346, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.43631872104818037, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.725100223395414, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8342041754812477, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7541096773855238, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9352893606252747, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7137044016250488, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8459329201101423, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9155785169978052, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.454243405917021, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4367071875067552, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9053865214400596, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9344907300105301, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.941467473244312, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8759462570863868, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116059567890715, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95453015576562, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271804273091313, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9494380676747487, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8907525765155897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9420326057327402, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8729192735278123, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.840210783941434, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8830406923187026, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8705872791986208, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9129896861855028, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9775140091004713, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.931908394385036, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.958499216692883, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9787648208394673, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8848447424869419, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9476480635849643, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8420296194650692, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9645398026978572, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.976975965491712, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9575751193892209, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922108923148009, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9367021384173281, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9136709169732016, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9031487241080103, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922108923148009, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9717329164232313, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9810420842974353, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9296061535584738, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9548717794727779, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9723617284409432, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9433216405879152, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9253992588631311, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7833761650543694, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8958698547783525, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659983030155975, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9368374793769542, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9292848975349729, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9705333075369675, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9560908971572966, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9288860917142431, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9402643484548583, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9303023646781129, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9076656012518489, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272618174968876, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630829363546703, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9437691960187881, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9233897890679653, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9217593594034571, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9429459010031568, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9680340601535599, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9444947592571505, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9524237679532525, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8521740000505951, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9147273981117778, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9353915284262971, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9575256886848735, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544425909905248, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.887089742205764, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937272463225717, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7360571605491374, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9158962896380519, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9016185053131788, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9519313199322048, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9002497361613263, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9404564646985731, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9525612663771642, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9416090102549223, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946182450185975, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8827665860178672, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9242269657430007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9209375409360453, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9453162319718537, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354735336178899, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9650606723493668, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.937172702008466, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9037456319061896, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9527540439558733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9777992945719618, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9483614149601093, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630476322301069, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9090634311284931, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9592439701684463, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9352813563171796, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9578898822826803, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9349087092124988, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9553475775967099, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9426144990998162, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455357310467346, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359599516797827, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8529883661830301, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9313047211019367, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9311406569876187, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9392038901097501, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9504743930445531, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283998656503502, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9934034758807603, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9131528589305679, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9917679206284817, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9566767123929576, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359924521743563, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8893588081911743, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9583698738001583, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9934034758807603, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946392812169666, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.911875333930421, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9169315433407361, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9541325707307038, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9648123726963476, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8370298547932784, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9334875203861144, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413496332501932, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9956823103485622, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457390517164731, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9671298665063969, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9336521523423332, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9502062892893858, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9333019767772176, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9037394051488277, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283644587512466, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237582925385585, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995566191566017, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.896344147038989, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.09821094254330615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9548273305811203, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9251737690567995, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275689564213165, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272442008199501, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9520060001290835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058859200742604, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8789724147701462, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9137645544850267, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969027357279203, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9520060001290835, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275374047069039, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8772309014828462, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9140052999897977, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.868350408637765, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7563541659131354, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8578315979157695, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8441075622700097, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.23829288001976573, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9407267756704489, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.831845583109951, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9530684796567226, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8984174935165463, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946008414943598, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9285885624039975, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9645189965938258, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9601667560566091, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9113133701465544, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9363094557613988, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9499594621802195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8850558582872771, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413520522974334, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8953760832780698, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9516191368774216, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.873135905690596, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331628274049639, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9350921637704382, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9398175409358328, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9675093986501344, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9282207391671503, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.957452925924953, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937237551170429, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256331955884847, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.904390835311888, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995954000535624, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.928962868887516, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9339798045072082, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8008809042180175, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240001424211951, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3493344613894351, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.92829327413418, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359271530286619, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9641555435524619, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90719289051837, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8543701176038877, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9045960456690756, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9576659929734302, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9445842802137389, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917893569547509, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9031282594956593, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9325823323160847, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9171277146973622, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9631220314707449, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9125575210703364, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9671298665063969, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8583796678495444, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9075511178990168, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8942877287874674, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8881782096383685, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8452994228892592, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.857664755026069, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7687402404428638, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8221659843346086, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8685375697135141, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7860944644568774, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7829829019188287, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.289269703803095, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7829829019188287, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.915813486906383, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.928671169616198, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9195852720074569, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9260563505342738, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8580715674095071, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8991782906832555, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9549429726485847, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8571447284090962, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.953599772014362, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9382091007325469, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9125682774652475, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084959093441131, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9389584881035126, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8968120926569282, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8757339860702672, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9742989957563788, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9854564066904739, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938338375356983, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9363458435045497, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275189832478317, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9680610688075657, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9458276502828801, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9555270393882619, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.774972667720128, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9238483556315539, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9292605756517186, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8710905917506855, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8877998658561537, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9408832971568818, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8758560882945217, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9047504210526172, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9149458726191051, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9497380252636716, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9247145535687903, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8829314518141973, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9497380252636716, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9369900232316837, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9584772514045287, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9656526051593539, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9262800142753679, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9178799098053634, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8988056403515298, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240902217687106, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9454713149117651, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457650793019858, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9115531547253959, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9403725471773088, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9107758326980321, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9251111872988325, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9267004903727016, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9652440580136615, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.924254800539438, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9054967244578502, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.886673201587762, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9082204179924286, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665046359304257, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9492870842156111, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9495327576081029, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9605742681789634, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9410712595774171, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.971921146040729, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8360964435901039, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278436686065653, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540941235545723, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7765803419515074, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9368660209060221, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9179315685239186, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9198867501155861, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9357668560693397, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.880651835588671, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9322025130978147, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8666701669384438, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9301584319196643, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9201441893603447, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4518476286184633, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8817151383770689, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9719892276800867, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9232252378020026, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90340499273861, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9445601279006905, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9284637794790105, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9506720475284802, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9650672132857259, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935825271074837, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9417006532894496, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9180957642017807, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9336273124319283, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9199623581249377, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9420383150390214, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9169222881606529, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9358954768171188, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9210475526688618, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.900422383617428, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9195975724156285, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9482591669689567, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.894400898846725, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9683895601588671, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.974733551222386, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935724475087967, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.945278116491169, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.913976993531483, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9206503738833902, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8879551150411227, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9093507960484853, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.44325871778061554, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8629899790604912, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659019608247615, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9612040783142544, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9355702448711621, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8575724679460186, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.919154316989783, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9107041155041439, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8860042875765471, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9163443895096822, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9513360683724416, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9506442510575418, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9210869399305139, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8602965545640948, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8912610518101419, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.857937519719319, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9528771181894694, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9241995664234885, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432104991415542, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8927784164557715, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8681309346882299, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9045257596276787, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7552111299277484, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.82396628763246, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8809116426093319, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9069369532463243, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8255413975339149, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116712045344968, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8855094439275503, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8809116426093319, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5085021700346579, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8155954216287978, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8849766832597384, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9321985099431636, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9158869153954171, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8940299169999223, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9029209331114941, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9434784706316768, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9504499063681887, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8103402263404181, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9033542015144801, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8920851535963175, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9012698346023688, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8815241253287673, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.955434974676454, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9190034267575142, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9028341607528202, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7933760889502307, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9669111778196173, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9183552099282611, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9213964969470535, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9438561056375272, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9245427558640842, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9466217999433078, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8600910973378976, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5465479162881712, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.908088143295894, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8689979953554426, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8837997874830685, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9430526976186369, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7213258253735133, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8583796678495444, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5352913894873965, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7506613813658406, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9120029292560927, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.969258616291086, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359933426460225, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8446197069920836, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665537794677691, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7519024768911576, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419599049218603, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9100379761498075, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9243062555931161, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9232535952320629, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9430158926147498, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8923268998495886, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9316958873367511, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9441083273271286, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899852954654377, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451690574618664, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9470556595464068, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8625414653847894, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8658510104009289, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938651167013012, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9362303281043904, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9288883358178652, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7378741057437793, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.898904151376881, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8446522700991944, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9053865214400596, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8700885813654318, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331139325257429, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8860497305091617, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8222704990602537, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8402559609277754, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7386088026745246, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.06557474419143802, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8700885813654318, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8771568927591851, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8869070241487921, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8173012945645394, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8220012279932035, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8449397341788647, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9239069749524619, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8972504357155736, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6602446784708298, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8667833154965509, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7306831212016971, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7306831212016971, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7406377967705062, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8509760908759664, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.96926930549605, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8641726957145408, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9637804258017773, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240863542577373, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9450374119495017, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.96926930549605, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9392663489644577, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8389799674466019, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9253208187778743, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9144266092886102, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9500117624130617, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.905862662289465, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9003734503251455, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.858544407149412, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9281598514152588, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.948121913854874, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9629589146416885, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9580736862318411, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9708835294542548, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9234823141384267, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9939521304203686, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9474838221026617, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9488355997601815, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424390135303181, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9711070259637357, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237920416869381, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8942780008373756, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8468261925085733, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8976119317111001, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9527352893094178, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9510981354135275, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9521144628004171, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9774592733638915, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9806060444395596, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9049668032095894, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9667317239059525, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9159800198090925, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9667317239059525, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8925738398388144, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058585844143391, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8888787903169728, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8925738398388144, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9128855680689195, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272821491047395, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413354408985303, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.46619006556188114, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.913896513382741, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058585844143391, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9557922260754473, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9391656780027514, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9260113686541587, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419307613884336, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9845996986850503, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9255228522887315, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.889174440461237, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9496761617043387, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9322360743819351, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935492418630274, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9456325305487512, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9325466173278317, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240800356922247, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9361690788124847, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938043640398588, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.901373116210745, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9531605377803356, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9132591460407243, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9494481589794223, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9415361564397403, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.897450557161678, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.937002127196651, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9435408381256087, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9421449698305296, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9607456319189528, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5467617051776391, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969209805167669, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9060555921929084, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969209805167669, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9096430262961498, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7983940190154283, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9132591460407243, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9204057102575467, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4968312722246179, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8001971757912975, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9204057102575467, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95112146871187, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.949624286506194, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9588139991437585, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9236414681715879, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9453633691396565, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278367059866518, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302237306555959, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8441460025255829, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9470556595464068, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.951863030034636, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8944443568631728, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9082204179924286, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90717359411325, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9014597856352894, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9250084453288043, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95462554022758, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9318340131711181, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736147802901586, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9182449217144187, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9400180064454685, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9255769217104873, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9309426923102619, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9002012094811458, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9690017425712892, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6924365679057801, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.804543317337012, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8385395593542468, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9515560914045473, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.854435717190483, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6676892344393273, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.873135905690596, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6885773376269438, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.46961217063286037, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8379214027434272, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.20981645725460496, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6659995521111991, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8388678282825207, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9180596829241628, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9166274634412449, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8626786769008709, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7991709881281639, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8119656541607598, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8872308158649556, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8914910756561332, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.927494511055529, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9528614248210486, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8523282278495175, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9297633204435644, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278042759794851, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8998995790099074, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302677881301988, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9522511234396616, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7585159184184324, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8922770448230282, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9126128133576369, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6245412677586388, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.868233862673363, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8852329532489643, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8378994642516495, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8775848642818888, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9448292727000915, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8555426729178464, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7558344174949267, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8497451239178159, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199763712080639, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8206722459046871, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.883570112979728, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.914786293186172, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8845568645036501, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937192042814042, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.901348698020278, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8382013802825361, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9219786709510569, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8767649499531999, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9094880423990607, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8719390074611821, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9349020382990011, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272997117562144, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8962185446474815, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8930034245249151, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271664513693498, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8936606750264663, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8803360259381345, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8680210960657176, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7931982206364059, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9438398456065387, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9281186022380125, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9184823166209557, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8884834862973964, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9300073119656489, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9381606131991436, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9300073119656489, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4072337657555589, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9184823166209557, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9293646790023864, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9511392272878579, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9293879632586071, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9277950353049101, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8843378183459343, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8741633139531418, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271525909282003, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736840552120738, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9396084767892234, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9187563342696414, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8896752045577786, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9737097349915758, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9330058893011377, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9570066548501687, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9201684039669155, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9133901345922595, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9458636432813123, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917857433142856, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9375412439691305, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9556267474396976, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489054429933926, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489054429933926, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8327628422929998, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9249365863966041, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922528755167094, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9486938895906879, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8620685016584069, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9405916043682414, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9327915990783561, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909738029095061, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909738029095061, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8707492337114523, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95883735444933, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455007606735264, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9372630850025364, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5983897920478856, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9299762198228243, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302303599426779, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544609413449265, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9355306533611718, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432457481338326, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9472285181144658, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.923828763793418, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9224761498105726, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9756278595118478, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9499594621802195, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544238060448419, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9012364553153411, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199585012210312, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9280048312907723, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9587462450914201, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8938919301593574, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9507758066685948, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432005035367906, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9675203656708941, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9303385434730891, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9401106918306472, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9533532275954528, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9274629860503822, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8926908826740254, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6224897798032885, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7893575827661004, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9478696521177714, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7160421907140165, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6217685026572488, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.794919886900137, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8793006100154936, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5916523997385489, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4849269488253923, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7462718113811923, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8083701726292805, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.722502153449955, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5544920599877754, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6853792233736985, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9200538056807258, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630774769374594, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9143443086107108, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9052744049140443, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9018850910676268, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9142574363760879, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9168431011517528, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9141901633008906, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9317477810881586, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354759108346813, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9141453314674155, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9550191440621234, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8402328635525613, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.831845583109951, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9092382099397807, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9770044719642067, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9155318202784664, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8272309965382391, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7767725512278205, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9373981486656514, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9579023880929557, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9268329536813669, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.904428807825769, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116613044583819, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084279839455062, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8013174743750245, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.34811585804131506, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8912610518101419, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084279608664247, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9342971539350323, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618018909441389, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221850850049388, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9621502301102783, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9207497282487874, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8817316559043479, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9595521389704431, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9604273088099046, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8832167531630292, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618018909441389, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9338423795983638, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8069582822584229, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432051372011929, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8538919155402751, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8601111478550084, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8274840531521687, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8737243337458652, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8213297311895551, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3007622907436899, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.906379768806771, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8996352283472103, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8577239523880982, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9705288278234159, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9022302698191352, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618116705103616, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9282902444420971, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283062281157928, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9143841728614055, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9148205155364358, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9429357495928096, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199038085123204, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9007500710615358, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9335504867261654, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8519148326217993, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9467340802817513, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8914166352994622, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8697448206881571, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9091527400737927, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9487286082082608, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9550331732946552, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9703747509928279, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540941235545723, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9543144589160125, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.652649628941592, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9767775472269087, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9393628940364738, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9410712595774171, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6444379795256558, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8964898605551818, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9263597385884417, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.824741266541094, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8283905649271065, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.881413837458117, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9437940294094723, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9269703177791706, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.944904344834561, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8479413107328494, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9715595760527852, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8595969327963556, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9538713542813556, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8348508116391393, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9572462820044535, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9375119517314923, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9493167367596885, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9344916654109876, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9849529115133767, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275259780895282, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9276874028790393, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9473074618830379, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9460494618521745, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8923268998495886, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455007606735264, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9526558782357073, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9338345156544289, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995764072227389, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9505226544098013, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630841609539229, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451142647196181, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7510122845400926, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8090165300577936, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9543128468386116, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.920197561569537, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8759929746436435, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8935424392990651, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7769676399488106, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8664932988313133, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.906379768806771, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8001297194719582, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9048724843551281, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8947987168857687, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489238765618674, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.945278116491169, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8989194854163256, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9061728639858796, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9501419212325259, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.891206254843651, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331628274049639, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9046319474149982, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9463095328863311, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9085828484030862, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8856061163721227, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9661878700572512, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.908669313428767, + "sentence_nr": 29 + } + ] +} \ No newline at end of file